1 Introduction
This WebBook contains the code written to analyse data from the study of the functional profile of metagenome-assembled genomes and temporal trends of derived microbial communities from intensively reared broiler chickens.
1.1 Prepare the R environment
1.1.1 Environment
To reproduce all the analyses locally, clone this repository in your computer using:
RStudio > New Project > Version Control > Git
And indicating the following git repository:
https://github.com/alberdilab/chicken_genome_reduced_bacteria.git
Once the R project has been created, follow the instructions and code chunks shown in this webbook.
1.1.2 Libraries
The following R packages are required for the data analysis.
# Base
library(R.utils)
library(knitr)
library(tidyverse)
library(devtools)
library(rmarkdown)
library(janitor)
# For tree handling
library(ape)
library(phyloseq)
library(phytools)
library(tidytree)
# For plotting
library(ggplot2)
library(ggrepel)
library(ggpubr)
library(ggnewscale)
library(gridExtra)
library(ggtreeExtra)
library(ggtree)
library(ggh4x)
library(sjPlot)
library(colorspace)
library(vioplot)
# For statistics
library(spaa)
library(vegan)
library(Rtsne)
library(microbiome)
library(geiger)
library(hilldiv2)
library(distillR)
library(broom.mixed)
library(Hmsc)
library(MuMIn)
library(corrplot)
library(lme4)
library(nlme)
library(boot)2 Metagenomic data preparation
2.1 Load data
Load the original data files outputted by the bioinformatic pipeline.
2.1.2 Taxonomy of metagenome-assembled genomes
genome_taxonomy <-
read_tsv("data/taxonomy.tsv") %>%
rename(genome = 1) %>%
mutate(phylum = case_when(
phylum == "Actinobacteriota" ~ "Actinomycetota",
phylum == "Firmicutes" ~ "Bacillota",
phylum == "Firmicutes_A" ~ "Bacillota_A",
phylum == "Firmicutes_B" ~ "Bacillota_B",
phylum == "Firmicutes_C" ~ "Bacillota_C",
phylum == "Cyanobacteria" ~ "Cyanobacteriota",
phylum == "Proteobacteria" ~ "Pseudomonadota",
TRUE ~ phylum)) %>%
filter(!domain == "Archaea")2.2 Distilling Genome Inferred Functional Traits (GIFTs)
2.2.1 Distilling DRAM annotations
gifts_raw <- distill(genome_annotations, GIFT_db2,
genomecol = 2, annotcol = c(9,10,19), verbosity = F)
# Load ENA to genome_id table
ena_to_mag_id <- read_tsv("data/ena_to_mag_id.tsv")
gifts_matrix <-
gifts_raw %>%
data.frame() %>%
rownames_to_column(var = 'mag_name') %>%
left_join(ena_to_mag_id %>%
select(mag_name, mag_id),
by = 'mag_name') %>%
select(-mag_name) %>%
relocate(mag_id) %>%
filter(mag_id %in% genome_taxonomy$genome) %>%
column_to_rownames('mag_id') %>%
as.matrix()2.3 Create working objects
Transform the original data files into working objects for downstream analyses.
2.3.3 Prepare a phyloseq object
phylo_samples <-
chicken_metadata %>%
mutate(sampling_time = as.factor(sampling_time),
trial_age = paste(trial, sampling_time, sep = "_"),
trial_age = as.factor(trial_age)) %>%
column_to_rownames("animal_code") %>%
sample_data() # Convert to phyloseq sample_data object
phylo_genome <- otu_table(total, taxa_are_rows = TRUE)
phylo_taxonomy <-
genome_taxonomy %>%
column_to_rownames("genome") %>%
as.matrix() %>%
tax_table() # Convert to phyloseq tax_table object
phylo_tree <- phy_tree(genome_tree)
phylo_seq <- phyloseq(phylo_genome, phylo_taxonomy, phylo_samples, phylo_tree)2.4 Prepare color scheme
# As dataframe
taxonomy_colors <- read_tsv("data/taxonomy_colours.tsv")
# As vector
phylum_colors <-
c(Halobacteriota = "#f2f2f2",
Methanobacteriota = "#bfbfbf",
Patescibacteria = "#dacce3",
Campylobacterota = "#cdadca",
Synergistota = "#c08eb3",
Thermoplasmatota = "#777dae",
Verrucomicrobiota = "#0066ae",
Desulfobacterota = "#68b0dc",
Pseudomonadota = "#60cfde",
Bacteroidota = "#4dc87c",
Cyanobacteriota = "#92e09f",
Actinomycetota = "#c9e0af",
Deferribacterota = "#dff77e",
Bacillota = "#ffd366",
Bacillota_A = "#fd8854",
Bacillota_B = "#8b1222",
Bacillota_C = "#5a0c37")
order_colors <-
c(Methanomicrobiales = "#f2f2f2",
Methanobacteriales = "#bfbfbf",
Saccharimonadales = "#dacce3",
Campylobacterales = "#cdadca",
Synergistales = "#c08eb3",
Methanomassiliicoccales = "#777dae",
Victivallales = "#0066ae",
Opitutales = "#1c7ebc",
Verrucomicrobiales = "#4a96cc",
Desulfovibrionales = "#68b0dc",
Enterobacterales = "#60cfde",
RF32 = "#60dfd2",
Burkholderiales = "#5cdfb5",
'Rs-D84' = "#40df91",
Bacteroidales = "#4dc87c",
Flavobacteriales = "#88c88b",
Gastranaerophilales = "#92e09f",
Coriobacteriales = "#c9e0af",
Actinomycetales = "#d8e093",
Deferribacterales = "#dff77e",
RF39 = "#ecf76d",
Lactobacillales = "#feef68",
'ML615J-28' = "#fde671",
Erysipelotrichales = "#ffd366",
Acholeplasmatales = "#fdc151",
Bacillales = "#fc953d",
RFN20 = "#fd8035",
Oscillospirales = "#fd8854",
TANB77 = "#f4814d",
Christensenellales = "#c26340",
Lachnospirales = "#c28c5c",
Clostridiales = "#ce9360",
Monoglobales_A = "#ce734f",
Peptostreptococcales = "#c65631",
Monoglobales = "#b93725",
UBA1212 = "#b10f19",
UBA4068 = "#8b1222",
Selenomonadales = "#7a1a12",
Acidaminococcales = "#64121f",
Veillonellales = "#5a0c37")2.5 Wrap working objects
All working objects are wrapped into a single Rdata object to facilitate downstream usage.
save(
# Chicken data
chicken_metadata,
# Bacterial genome data
genome_taxonomy,
genome_stats,
genome_tree,
genome_kegg_paths,
read_counts,
# Transformed data
mag_weighted,
total,
hel,
# Phyloseq objects
phylo_seq,
# Functions
gifts_elements,
gifts_functions,
gifts_domains,
# Colors
taxonomy_colors,
phylum_colors,
order_colors,
sampling_day_colors,
# Define file path
file = "data/data_mg.Rdata")3 Metatranscriptomic data preparation
3.1 Load data
3.1.1 Metatranscriptomic gene expression counts
if(!file.exists("data/gene_expressions.tsv.xz")){
download.file(
url = 'https://sid.erda.dk/share_redirect/Bd8UfDO2D6/gene_expressions.tsv.xz',
destfile = 'data/gene_expressions.tsv.xz', method = 'curl'
)
}
gene_expr <-
read_tsv("data/gene_expressions.tsv.xz") %>%
select(1:129) %>%
rename(mag_name = 'MAG') %>%
relocate(mag_name)
# Chunk analysis to sets of 100 MAGs
mags <- sort(unique(gene_expr$mag_name))
gene_expr_1 <- gene_expr[gene_expr$mag_name %in% mags[c(1:100)],c(2:129)]
gene_expr_2 <- gene_expr[gene_expr$mag_name %in% mags[c(101:200)],c(2:129)]
gene_expr_3 <- gene_expr[gene_expr$mag_name %in% mags[c(201:300)],c(2:129)]
gene_expr_4 <- gene_expr[gene_expr$mag_name %in% mags[c(301:400)],c(2:129)]
gene_expr_5 <- gene_expr[gene_expr$mag_name %in% mags[c(401:500)],c(2:129)]
gene_expr_6 <- gene_expr[gene_expr$mag_name %in% mags[c(501:600)],c(2:129)]
gene_expr_7 <- gene_expr[gene_expr$mag_name %in% mags[c(601:700)],c(2:129)]
gene_expr_8 <- gene_expr[gene_expr$mag_name %in% mags[c(701:825)],c(2:129)]
save(gene_expr_1, file = "data/gene_expr_1.Rdata")
save(gene_expr_2, file = "data/gene_expr_2.Rdata")
save(gene_expr_3, file = "data/gene_expr_3.Rdata")
save(gene_expr_4, file = "data/gene_expr_4.Rdata")
save(gene_expr_5, file = "data/gene_expr_5.Rdata")
save(gene_expr_6, file = "data/gene_expr_6.Rdata")
save(gene_expr_7, file = "data/gene_expr_7.Rdata")
save(gene_expr_8, file = "data/gene_expr_8.Rdata")
rm(list = ls())3.1.2 Distillation
mag_ann <-
read_tsv("data/gene_annotations.tsv.xz") %>%
mutate(gene_length = end_position - start_position)
# Chunk analysis
load("data/gene_expr_1.Rdata")
distq_1 <- distillq(gene_expr_1, mag_ann, GIFT_db2,
genecol = 1, genomecol = 2, annotcol = c(9, 10, 19))
save(distilled_caecum_1, file = "data/distilled_caecum_1.Rdata")
rm(gene_expr_1, distilled_caecum_1)
load("data/gene_expr_2.Rdata")
distq_2 <- distillq(gene_expr_2, mag_ann, GIFT_db2,
genecol = 1, genomecol = 2, annotcol = c(9, 10, 19))
save(distilled_caecum_2, file = "data/distilled_caecum_2.Rdata")
rm(gene_expr_2, distilled_caecum_2)
load("data/gene_expr_3.Rdata")
distq_3 <- distillq(gene_expr_3, mag_ann, GIFT_db2,
genecol = 1, genomecol = 2, annotcol = c(9, 10, 19))
save(distilled_caecum_3, file = "results/tables/distilled_caecum_3.Rdata")
rm(gene_expr_3, distilled_caecum_3)
load("data/gene_expr_4.Rdata")
distq_4 <- distillq(gene_expr_4, mag_ann, GIFT_db2,
genecol = 1, genomecol = 2, annotcol = c(9, 10, 19))
save(distilled_caecum_4, file = "results/tables/distilled_caecum_4.Rdata")
rm(gene_expr_4, distilled_caecum_4)
load("data/gene_expr_5.Rdata")
distq_5 <- distillq(gene_expr_5, mag_ann, GIFT_db2,
genecol = 1, genomecol = 2, annotcol = c(9, 10, 19))
save(distilled_caecum_5, file = "results/tables/distilled_caecum_5.Rdata")
rm(gene_expr_5, distilled_caecum_5)
load("data/gene_expr_6.Rdata")
distq_6 <- distillq(gene_expr_6, mag_ann, GIFT_db2,
genecol = 1, genomecol = 2, annotcol = c(9, 10, 19))
save(distilled_caecum_6, file = "results/tables/distilled_caecum_6.Rdata")
rm(gene_expr_6, distilled_caecum_6)
load("data/gene_expr_7.Rdata")
distq_7 <- distillq(gene_expr_7, mag_ann, GIFT_db2,
genecol = 1, genomecol = 2, annotcol = c(9, 10, 12))
save(distilled_caecum_7, file = "results/tables/distilled_caecum_7.Rdata")
rm(gene_expr_7, distilled_caecum_7)
load("results/tables/gene_expr_8.Rdata")
distq_8 <- distillq(gene_expr_8, mag_ann, GIFT_db2,
genecol = 1, genomecol = 2, annotcol = c(9, 10, 12))
save(distilled_caecum_8, file = "results/tables/distilled_caecum_8.Rdata")
rm(gene_expr_8, distilled_caecum_8)3.1.3 Combine metatranscriptomic GIFT expression counts
load("data/distilled_caecum_1.Rdata")
load("data/distilled_caecum_2.RData")
load("data/distilled_caecum_3.RData")
load("data/distilled_caecum_4.RData")
load("data/distilled_caecum_5.RData")
load("data/distilled_caecum_6.RData")
load("data/distilled_caecum_7.RData")
load("data/distilled_caecum_8.RData")
expr_counts_raw <- c(distilled_expression_caecum_1, distilled_expression_caecum_2,
distilled_expression_caecum_3, distilled_expression_caecum_4,
distilled_expression_caecum_5, distilled_expression_caecum_6,
distilled_expression_caecum_7, distilled_expression_caecum_8)
rm(distilled_expression_caecum_1, distilled_expression_caecum_2,
distilled_expression_caecum_3, distilled_expression_caecum_4,
distilled_expression_caecum_5, distilled_expression_caecum_6,
distilled_expression_caecum_7, distilled_expression_caecum_8)
# Change MAG IDs for a standardised code
ena_to_mag_id <- read_tsv("data/ena_to_mag_id.tsv")
mag_name <- names(expr_counts_raw)
names(expr_counts_raw) <- ena_to_mag_id$mag_id[match(mag_name, ena_to_mag_id$mag_name)]
# Correct animal codes
expr_counts <- lapply(expr_counts_raw, function(x) {
rownames(x) <- gsub("F1a", "", rownames(x))
x
})4 MAG catalogue richness
4.2 Densities of overall relative abundances by phylum
4.2.1 Calculate densities
sum_ind <-
total %>%
rownames_to_column(var = 'genome') %>%
left_join(genome_taxonomy %>% select(genome, phylum), by = 'genome') %>%
mutate(phylum = factor(phylum, levels = unique(names(phylum_colors)))) %>%
pivot_longer(-c(genome, phylum), values_to = 'value', names_to = 'animal_code') %>%
group_by(phylum, animal_code) %>%
summarise(total_ind = sum(value), mean_ind = mean(value))
sum_phylum <-
sum_ind %>%
group_by(phylum) %>%
summarise(total_phylum = sum(total_ind), mean_phylum = mean(mean_ind))4.2.3 Plot curves
The resulting plot corresponds to Figure 1a.
ggplot(count_taxa, aes(x = total_ind, colour = phylum, fill = phylum)) +
geom_density(alpha = 0.5) +
scale_fill_manual(values = phylum_colors) +
scale_color_manual(values = phylum_colors) +
scale_x_log10() +
xlab("Relative abundance (log-transformed)") +
ylab("Density") +
theme_minimal() +
theme(
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
panel.grid.minor.y = element_blank(),
panel.grid.major.y = element_blank(),
axis.text.y = element_blank(),
axis.title.x = element_text(size = 8),
axis.title.y = element_text(size = 8),
axis.text.x = element_text(size = 8),
legend.position = "bottom") 4.3 Functional ordination - PCoA with PCA loadings
gift_pcoa <-
gifts_elements %>%
as.data.frame() %>%
vegdist(method="euclidean") %>%
pcoa()
gift_pcoa_rel_eigen <- gift_pcoa$values$Relative_eig[1:10]
# Get genome positions
gift_pcoa_vectors <-
gift_pcoa$vectors %>% # extract vectors
as.data.frame() %>%
select(Axis.1,Axis.2) # keep the first 2 axes
gift_pcoa_eigenvalues <- gift_pcoa$values$Eigenvalues[c(1,2)]
gift_pcoa_gifts <-
cov(gifts_elements, scale(gift_pcoa_vectors)) %*% diag((gift_pcoa_eigenvalues/(nrow(gifts_elements)-1))^(-0.5)) %>%
as.data.frame() %>%
rename(Axis.1 = 1, Axis.2 = 2) %>%
rownames_to_column(var = "label") %>%
mutate(func = substr(label,1,3)) %>%
group_by(func) %>%
summarise(Axis.1 = mean(Axis.1),
Axis.2 = mean(Axis.2)) %>%
rename(label = func) %>%
filter(!label %in% c("S01","S02","S03"))The resulting plot corresponds to Figure 1b.
scale <- 20 # scale for vector loadings
gift_pcoa_vectors %>%
rownames_to_column(var = "genome") %>%
left_join(genome_stats, by = "genome") %>%
left_join(genome_taxonomy, by = "genome") %>%
ggplot() +
# genome positions
scale_color_manual(values = order_colors)+
geom_point(aes(x = Axis.1, y = Axis.2, color = order, size = mag_length),
alpha = 0.9, shape = 16) +
# scale_color_manual(values=phylum_colors) +
scale_size_continuous(range = c(0.05, 4)) +
# loading positions
geom_segment(data = gift_pcoa_gifts,
aes(x = 0, y = 0, xend = Axis.1 * scale, yend = Axis.2 * scale),
arrow = arrow(length = unit(0.2, "cm"),
type = "open",
angle = 25),
linewidth = 0.2,
color = "black") +
# primary and secondary scale adjustments
scale_x_continuous(name = paste0("PCoA1 (",round(gift_pcoa_rel_eigen[1]*100, digits = 2), " %)"),
sec.axis = sec_axis(~ . / scale, name = "Loadings on PCoA1")) +
scale_y_continuous(name = paste0("PCoA2 (",round(gift_pcoa_rel_eigen[2]*100, digits = 2), " %)"),
sec.axis = sec_axis(~ . / scale, name = "Loadings on PCoA2")) +
geom_label_repel(data = gift_pcoa_gifts,
aes(label = label, x = Axis.1 * scale, y = Axis.2 * scale),
segment.color = 'transparent',
size = 3) +
theme_minimal() +
theme(legend.position = "bottom",
axis.title.x = element_text(size = 12),
axis.title.y = element_text(size = 12),
axis.text.x = element_text(size = 10),
axis.text.y = element_text(size = 10),
text = element_text(size = 8))4.3.1 Plot genome length and average MCI gradients
# Preparing data for ploting gradients
overall_mci <-
gifts_elements %>%
as.data.frame() %>%
rownames_to_column(var = "genome") %>%
rowwise() %>%
mutate(overall = mean(c_across(B0101:S0301))) %>%
select(genome, overall)
gift_pcoa_vectors_with_metadata <-
gift_pcoa_vectors %>%
rownames_to_column(var = "genome") %>%
left_join(overall_mci, by = "genome") %>%
left_join(genome_stats %>%
select(genome, mag_length), by = "genome") %>%
left_join(gifts_functions %>%
as.data.frame %>%
rownames_to_column(var = "genome") %>%
select(genome, B04, B07))
scale_factor <- max(gift_pcoa_vectors_with_metadata$mag_length, na.rm = TRUE) /
max(gift_pcoa_vectors_with_metadata$overall, na.rm = TRUE)
# Axis 1 gradients
gift_pcoa_vectors_with_metadata %>%
ggplot(aes(x = Axis.1)) +
geom_smooth(aes(y = mag_length), color = "darkblue") +
geom_smooth(aes(y = overall * scale_factor), color = "darkgreen") +
scale_y_continuous(name = "MAG length",
sec.axis = sec_axis(~ . / scale_factor, name = "Overall MCI")) +
theme_minimal() +
theme(
axis.title.y.left = element_text(color = "darkblue"),
axis.title.y.right = element_text(color = "darkgreen"),
axis.title.x = element_text(size = 12),
axis.title.y = element_text(size = 12),
axis.text.x = element_text(size = 10),
axis.text.y = element_text(size = 10),
text = element_text(size = 8),
legend.position = "none")4.3.2 Plot SCFA and vitamin biosynthesis gradients
# Axis 2 gradients
scale_factor <- max(gift_pcoa_vectors_with_metadata$B04, na.rm = TRUE) /
max(gift_pcoa_vectors_with_metadata$B07, na.rm = TRUE)
gift_pcoa_vectors_with_metadata %>%
ggplot(aes(x = Axis.2)) +
geom_smooth(aes(y = B04), color = "#ee3237") +
geom_smooth(aes(y = B07 * scale_factor), color = "#a61f5e") +
scale_y_continuous(name = "SCFA biosynthesis",
sec.axis = sec_axis(~ . / scale_factor, name = "Vitamin biosynthesis")) +
theme_minimal() +
theme(
axis.title.y.left = element_text(color = "#ee3237"),
axis.title.y.right = element_text(color = "#a61f5e"),
axis.title.x = element_text(size = 12),
axis.title.y = element_text(size = 12),
axis.text.x = element_text(size = 10),
axis.text.y = element_text(size = 10),
text = element_text(size = 8),
legend.position = "none")4.4 Functional ordination - interactive
library("plotly")
scale <- 20 # scale for vector loadings
pcoa_interactive <-
gift_pcoa_vectors %>%
rownames_to_column(var = "genome") %>%
left_join(genome_stats, by = "genome") %>%
left_join(genome_taxonomy, by = "genome") %>%
mutate(tooltip = paste(family, "//", genome)) %>%
ggplot() +
# genome positions
scale_color_manual(values = order_colors)+
geom_point(aes(x = Axis.1, y = Axis.2, color = order, size = mag_length, text = tooltip),
alpha = 0.9, shape = 16) +
scale_size_continuous(range = c(0.05,4)) +
# primary and secondary scale adjustments
scale_x_continuous(name = paste0("PCoA1 (",round(gift_pcoa_rel_eigen[1]*100, digits = 2), " %)")) +
scale_y_continuous(name = paste0("PCoA2 (",round(gift_pcoa_rel_eigen[2]*100, digits = 2), " %)")) +
theme_minimal() +
theme(legend.position = "none")
ggplotly(pcoa_interactive, tooltip = "text")4.5 Compare MCI and genome length between taxonomic groups
The resulting plot corresponds to Supplementary Figure S1.
main_phylums <- c("Bacillota_A", "Bacillota", "Bacteroidota",
"Cyanobacteriota","Pseudomonadota", "Actinomycetota",
"Verrucomicrobiota", "Campylobacterota"
)
selected_taxonomy_colors <-
taxonomy_colors %>%
filter(phylum %in% main_phylums)
selected_orders <-
selected_taxonomy_colors %>%
select(order, color_order) %>%
filter(order != "Gastranaerophilales") %>%
filter(order != "Actinomycetales") %>%
filter(order != "Coriobacteriales")
selected_groups <-
selected_taxonomy_colors %>%
select(phylum, color_phylum) %>%
distinct(phylum, color_phylum) %>%
filter(color_phylum != '#c28c5c') %>%
rename(order = 'phylum') %>%
rename(color_order = 'color_phylum') %>%
bind_rows(., selected_orders) %>%
mutate(order = factor(order, levels = c("Campylobacterota",
"Verrucomicrobiota",
"Actinomycetota",
"Enterobacterales",
"RF32",
"Burkholderiales",
"Rs-D84",
"Pseudomonadota",
"Gastranaerophilales",
"Cyanobacteriota",
"Bacteroidales",
"Flavobacteriales",
"Bacteroidota", #
"RF39",
"Lactobacillales",
"ML615J-28",
"Erysipelotrichales",
"Acholeplasmatales",
"Bacillales",
"RFN20",
"Bacillota",
"Oscillospirales",
"TANB77",
"Christensenellales",
"Lachnospirales",
"Clostridiales",
"Monoglobales_A",
"Peptostreptococcales",
"Monoglobales",
"UBA1212",
"Bacillota_A"))) %>%
arrange(order)genome_taxonomy <-
genome_taxonomy %>%
mutate(avg_mci = rowMeans(gifts_elements)) %>%
mutate(length = genome_stats$mag_length)
table_one <-
genome_taxonomy %>%
select(genome, order, avg_mci)
table_two <-
genome_taxonomy %>%
select(genome, phylum, avg_mci) %>%
rename(order = 'phylum') %>%
bind_rows(., table_one) %>%
rename(group = 'order')
border_groups <- c("Bacillota_A", "Bacillota", "Bacteroidota",
"Cyanobacteriota", "Pseudomonadota", "Actinomycetota",
"Verrucomicrobiota", "Campylobacterota")4.5.1 Average MCI by taxonomic phylum and order
table_two %>%
filter(group %in% selected_groups$order) %>%
mutate(group = factor(group, levels = selected_groups$order)) %>%
ggplot() +
geom_jitter(aes(x = group, y = avg_mci),
color = 'grey', alpha = 0.5, width = 0.1) +
geom_violin(aes(x = group, y = avg_mci, fill = group),
color = NA,
size = 0.01) +
theme_minimal() +
xlab("Taxonomic group") +
ylab("Genome average metabolic capacity") +
scale_fill_manual(values = selected_taxonomy_colors$color_order) +
theme(legend.position = 'none',
) +
coord_flip()4.5.2 Genome length by taxonomic phylum and order
table_three <-
genome_taxonomy %>%
select(genome, order, length)
table_four <-
genome_taxonomy %>%
select(genome, phylum, length) %>%
rename(order = 'phylum') %>%
bind_rows(., table_three) %>%
rename(group = 'order')
table_four %>%
filter(group %in% selected_groups$order) %>%
mutate(group = factor(group, levels = selected_groups$order)) %>%
ggplot() +
geom_jitter(aes(x = group, y = length), color = 'grey', alpha = 0.5, width = 0.1) +
geom_violin(aes(x = group, y = length, fill = group),
color = NA,
size = 0.01) +
theme_minimal() +
xlab("Taxonomic group") +
ylab("Mag length") +
scale_fill_manual(values = selected_taxonomy_colors$color_order) +
theme(legend.position = 'none') +
coord_flip()5 Alpha diversity
5.2 Hill numbers - Alpha diversity
# Neutral
neutral <-
mag_weighted %>%
dplyr::select(where(~ !all(. == 0))) %>%
hilldiv(., q = 1) %>%
t() %>%
as.data.frame() %>%
dplyr::rename(neutral = 1) %>%
rownames_to_column(var = "animal_code")
# Phylogenetic
phylogenetic <-
mag_weighted %>%
dplyr::select(where(~ !all(. == 0))) %>%
hilldiv(., q = 1, tree = genome_tree) %>%
t() %>%
as.data.frame() %>%
dplyr::rename(phylogenetic = 1) %>%
rownames_to_column(var = "animal_code")
# Functional KEGG
dist_kegg <-
genome_kegg_paths %>%
column_to_rownames(var = "genome") %>%
traits2dist(., method = "gower")
functional_kegg <-
mag_weighted %>%
dplyr::select(where(~ !all(. == 0))) %>%
hilldiv(., q = 1 , dis = dist_kegg) %>%
t() %>%
as.data.frame() %>%
dplyr::rename(functional_kegg = 1) %>%
rownames_to_column(var = "animal_code") %>%
mutate(functional_kegg = if_else(is.nan(functional_kegg), 1, functional_kegg))
# Sequence depth
log_seq_depth <-
read_counts %>%
column_to_rownames(var = 'genome') %>%
{as.data.frame(log(colSums(.)))} %>%
rename(seq_depth = 'log(colSums(.))') %>%
rownames_to_column(var = 'animal_code')
# Merge all metrics
alpha_div <-
neutral %>%
full_join(phylogenetic, by = 'animal_code') %>%
full_join(functional_kegg, by = 'animal_code') %>%
left_join(chicken_metadata, by = 'animal_code') %>%
left_join(log_seq_depth, by = 'animal_code')5.2.1 Alpha diversity by sampling day
The resulting plot corresponds to Figure 2a.
load("data/alpha_div.Rdata")
# Neutral
p_n <-
ggplot(alpha_div,
aes(x = sampling_time, y = neutral, group = sampling_time)) +
geom_jitter(aes_string(color = 'sampling_time'),
size = 0.3, width = 0.25, alpha = 0.6) +
geom_boxplot(aes_string(fill = 'sampling_time'),
width = 0.5,
alpha = 0.5,
outlier.color = NA) +
scale_fill_manual(values = c('#E69F00', '#CC6677', '#56B4E9')) +
scale_color_manual(values = c('#E69F00', '#CC6677', '#56B4E9')) +
theme_minimal() +
ylab(NULL) +
xlab(NULL) +
ggtitle("Neutral") +
theme(panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
axis.title.x = element_text(size = 8),
axis.title.y = element_text(size = 8),
axis.text.x = element_text(size = 8),
axis.text.y = element_text(size = 8),
plot.title = element_text(size = 10),
legend.position = "none")
# Phylogenetic
p_p <-
ggplot(alpha_div,
aes(x = sampling_time, y = phylogenetic, group = sampling_time)) +
geom_jitter(aes_string(colour = 'sampling_time'),
size = 0.3, width = 0.25, alpha = 0.6) +
geom_boxplot(aes_string(fill = 'sampling_time'),
width = 0.5,
alpha = 0.5,
outlier.color = NA) +
scale_fill_manual(values = c('#E69F00', '#CC6677', '#56B4E9')) +
scale_color_manual(values = c('#E69F00', '#CC6677', '#56B4E9')) +
theme_minimal() +
labs(x = NULL, y = NULL) +
ggtitle("Phylogenetic") +
theme(panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
axis.title.x = element_text(size = 8),
axis.title.y = element_text(size = 8),
axis.text.x = element_text(size = 8),
axis.text.y = element_text(size = 8),
plot.title = element_text(size = 10),
legend.position = 'none')
# Functional KEGGs
p_f_kegg <-
ggplot(alpha_div,
aes(x = sampling_time, y = functional_kegg, group = sampling_time)) +
geom_jitter(aes_string(colour = 'sampling_time'),
size = 0.3, width = 0.25, alpha = 0.6) +
geom_boxplot(aes_string(fill = 'sampling_time'),
width = 0.5,
alpha = 0.5,
outlier.color = NA) +
scale_fill_manual(values = c("#E69F00", "#CC6677", "#56B4E9")) +
scale_color_manual(values = c("#E69F00", "#CC6677", "#56B4E9")) +
theme_minimal() +
labs(x =NULL, y = NULL) +
ggtitle("Functional") +
theme(panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
axis.title.x = element_text(size = 8),
axis.title.y = element_text(size = 8),
axis.text.x = element_text(size = 8),
axis.text.y = element_text(size = 8),
plot.title = element_text(size = 10),
legend.position = "none")
grid.arrange(p_n, p_p, p_f_kegg, ncol = 3)5.3 Temporal development
The resulting tables correspond to Supplementary Table S2.
5.3.1 Neutral
m_div_n <-
lme(neutral ~ seq_depth + sex + breed + treatment + trial * age,
random = ~1|pen,
data = alpha_div)
anova(m_div_n) numDF denDF F-value p-value
(Intercept) 1 337 2131.3662 <.0001
seq_depth 1 337 4.1298 0.0429
sex 1 42 1.7688 0.1907
breed 1 42 0.2047 0.6533
treatment 2 42 1.4325 0.2501
trial 1 42 2.1839 0.1469
age 1 337 44.1644 <.0001
trial:age 1 337 0.7599 0.3840
5.3.2 Phylogenetic
m_div_p <-
lme(phylogenetic ~ seq_depth + sex + breed + treatment + trial * age,
random = ~ 1|pen,
data = alpha_div)
anova(m_div_p) numDF denDF F-value p-value
(Intercept) 1 337 22644.853 <.0001
seq_depth 1 337 0.413 0.5210
sex 1 42 4.093 0.0495
breed 1 42 11.799 0.0013
treatment 2 42 1.168 0.3210
trial 1 42 3.901 0.0549
age 1 337 1185.345 <.0001
trial:age 1 337 9.772 0.0019
5.3.3 Functional KEGG
m_div_f_kegg <-
lme(functional_kegg ~ seq_depth + sex + breed + treatment + trial * age,
random = ~ 1|pen,
data = alpha_div)
anova(m_div_f_kegg) numDF denDF F-value p-value
(Intercept) 1 337 307927.81 <.0001
seq_depth 1 337 0.20 0.6535
sex 1 42 0.79 0.3791
breed 1 42 3.00 0.0907
treatment 2 42 1.23 0.3016
trial 1 42 2.52 0.1203
age 1 337 406.41 <.0001
trial:age 1 337 1.17 0.2799
6 Bacterial community composition
6.2 Hill numbers - beta diversity
# Neutral
beta_q1n <-
mag_weighted %>%
filter(rowSums(. != 0, na.rm = TRUE) > 0) %>%
dplyr::select_if(~!all(. == 0)) %>%
hillpair(., q = 1, metric = "S", out = "pair")
# Phylogenetic
beta_q1p <-
mag_weighted %>%
filter(rowSums(. != 0, na.rm = TRUE) > 0) %>%
dplyr::select_if(~!all(. == 0)) %>%
hillpair(., q = 1, tree = genome_tree, metric = "S", out = "pair")
# Functional KEGG
dist_kegg <-
genome_kegg_paths %>%
column_to_rownames(var = "genome") %>%
traits2dist(., method = "gower")
beta_q1f_kegg <-
mag_weighted %>%
filter(rowSums(. != 0, na.rm = TRUE) > 0) %>%
dplyr::select_if(~!all(. == 0)) %>%
hillpair(., q = 1, dist = dist_kegg, metric = "S", out = "pair")6.2.1 Compare individuals from same trial and pen between sampling days
The resulting stats correspond to Supplementary Table S3.
load("data/beta_div.Rdata")
metadata_2 <-
chicken_metadata %>%
dplyr::rename(second = 'animal_code',
sampling_time_2 = 'sampling_time',
pen_2 = 'pen',
trial_2 = 'trial')
# Neutral
betadiv_n_dis <-
beta_q1n %>%
rename(animal_code = 'first') %>%
left_join(chicken_metadata, by = 'animal_code') %>%
left_join(metadata_2, by = 'second') %>%
mutate(diff = case_when(
sampling_time == '7' & sampling_time_2 == '21' ~ '7_21',
sampling_time == '21' & sampling_time_2 == '35' ~ '21_35',
sampling_time == '7' & sampling_time_2 == '35' ~ '7_35')) %>%
mutate(diff_trial = case_when(trial == 'CA' & trial_2 == 'CA' ~ 'CA',
trial == 'CB' & trial_2 == 'CB' ~ 'CB')) %>%
mutate(diff_pen = case_when(pen == pen_2 ~ 'same')) %>%
drop_na(diff, diff_trial, diff_pen)
# Phylogenetic
betadiv_p_dis <-
beta_q1p %>%
rename(animal_code = 'first') %>%
left_join(chicken_metadata, by = 'animal_code') %>%
left_join(metadata_2, by = 'second') %>%
mutate(diff = case_when(
sampling_time == '7' & sampling_time_2 == '21' ~ '7_21',
sampling_time == '21' & sampling_time_2 == '35' ~ '21_35',
sampling_time == '7' & sampling_time_2 == '35' ~ '7_35')) %>%
mutate(diff_trial = case_when(trial == 'CA' & trial_2 == 'CA' ~ 'CA',
trial == 'CB' & trial_2 == 'CB' ~ 'CB')) %>%
mutate(diff_pen = case_when(pen == pen_2 ~ 'same')) %>%
drop_na(diff, diff_trial, diff_pen)
# Functional KEGG
betadiv_f_dis_kegg <-
beta_q1f_kegg %>%
rename(animal_code = 'first') %>%
left_join(chicken_metadata, by = 'animal_code') %>%
left_join(metadata_2, by = 'second') %>%
mutate(diff = case_when(
sampling_time == '7' & sampling_time_2 == '21' ~ '7_21',
sampling_time == '21' & sampling_time_2 == '35' ~ '21_35',
sampling_time == '7' & sampling_time_2 == '35' ~ '7_35')) %>%
mutate(diff_trial = case_when(trial == 'CA' & trial_2 == 'CA' ~ 'CA',
trial == 'CB' & trial_2 == 'CB' ~ 'CB')) %>%
mutate(diff_pen = case_when(pen == pen_2 ~ 'same')) %>%
drop_na(diff, diff_trial, diff_pen)# A tibble: 3 x 3
diff mean sd
<chr> <dbl> <dbl>
1 21_35 0.458 0.100
2 7_21 0.542 0.0907
3 7_35 0.597 0.0917
# A tibble: 3 x 3
diff mean sd
<chr> <dbl> <dbl>
1 21_35 0.168 0.0776
2 7_21 0.238 0.0883
3 7_35 0.297 0.103
# A tibble: 3 x 3
diff mean sd
<chr> <dbl> <dbl>
1 21_35 0.205 0.246
2 7_21 0.252 0.291
3 7_35 0.227 0.262
6.2.2 Plot difference between sampling points for beta diversity
The resulting plot corresponds to Supplementary Figure S3.
# Neutral
p_n <-
betadiv_n_dis %>%
filter(!diff == '7_35') %>%
mutate(diff = factor(diff, levels = c('7_21', '21_35'))) %>%
ggplot(aes(x = diff, y = S, fill = diff)) +
geom_boxplot() +
ylim(0,0.5) +
theme_minimal() +
theme(legend.position = 'none') +
ggtitle('Neutral')
# Phylogenetic
p_p <-
betadiv_p_dis %>%
filter(!diff == '7_35') %>%
mutate(diff = factor(diff, levels = c('7_21', '21_35'))) %>%
ggplot(aes(x = diff, y = S, fill = diff)) +
geom_boxplot() +
ylim(0,0.5) +
theme_minimal() +
theme(legend.position = 'none') +
ggtitle('Phylogenetic')
# Functional KEGG
p_f_kegg <-
betadiv_f_dis_kegg %>%
filter(!diff == '7_35') %>%
mutate(diff = factor(diff, levels = c('7_21', '21_35'))) %>%
ggplot(aes(x = diff, y = S, fill = diff)) +
geom_boxplot() +
ylim(0,0.5) +
theme_minimal() +
theme(legend.position = 'none') +
ggtitle('Functional')
grid.arrange(p_n, p_p, p_f_kegg, ncol = 3)6.3 Effect of design in microbiome composition
6.3.1 Permanova values for different metrics of beta diversity
The resulting stats correspond to Supplementary Table S3.
# Dissimilarity tables
perm <- how(nperm = 999)
setBlocks(perm) <- with(chicken_metadata, pen)
# Neutral
b1n_dis_table <-
beta_q1n %>%
bind_rows(beta_q1n %>% rename(first = second, second = first)) %>%
pivot_wider(names_from = second, values_from = S) %>%
column_to_rownames('first') %>%
mutate(across(everything(), ~replace(., is.na(.), 0))) %>%
as.dist()
adonis2(b1n_dis_table ~ trial + sex + breed + treatment + trial + trial * age,
permutations = perm,
data = chicken_metadata,
by = "term")Permutation test for adonis under reduced model
Terms added sequentially (first to last)
Blocks: with(chicken_metadata, pen)
Permutation: free
Number of permutations: 999
adonis2(formula = b1n_dis_table ~ trial + sex + breed + treatment + trial + trial * age, data = chicken_metadata, permutations = perm, by = "term")
Df SumOfSqs R2 F Pr(>F)
trial 1 2.860 0.05468 26.0870 0.001 ***
sex 1 0.492 0.00941 4.4878 0.001 ***
breed 1 0.905 0.01731 8.2583 0.001 ***
treatment 2 0.857 0.01638 3.9066 0.001 ***
age 1 4.137 0.07909 37.7309 0.001 ***
trial:age 1 1.393 0.02663 12.7057 0.001 ***
Residual 380 41.665 0.79651
Total 387 52.310 1.00000
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Phylogenetic
b1p_dis_table <-
beta_q1p %>%
bind_rows(beta_q1n %>% rename(first = second, second = first)) %>%
pivot_wider(names_from = second, values_from = S) %>%
column_to_rownames('first') %>%
mutate(across(everything(), ~replace(., is.na(.), 0))) %>%
as.dist()
adonis2(b1p_dis_table ~ trial + sex + breed + treatment + trial + trial * age,
permutations = perm,
data = chicken_metadata,
by = "term")Permutation test for adonis under reduced model
Terms added sequentially (first to last)
Blocks: with(chicken_metadata, pen)
Permutation: free
Number of permutations: 999
adonis2(formula = b1p_dis_table ~ trial + sex + breed + treatment + trial + trial * age, data = chicken_metadata, permutations = perm, by = "term")
Df SumOfSqs R2 F Pr(>F)
trial 1 2.860 0.05468 26.0870 0.001 ***
sex 1 0.492 0.00941 4.4878 0.001 ***
breed 1 0.905 0.01731 8.2583 0.001 ***
treatment 2 0.857 0.01638 3.9066 0.001 ***
age 1 4.137 0.07909 37.7309 0.001 ***
trial:age 1 1.393 0.02663 12.7057 0.001 ***
Residual 380 41.665 0.79651
Total 387 52.310 1.00000
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Functional KEGG
b1f_dis_table_kegg <-
beta_q1f_kegg %>%
bind_rows(beta_q1n %>% rename(first = second, second = first)) %>%
pivot_wider(names_from = second, values_from = S) %>%
column_to_rownames('first') %>%
mutate(across(everything(), ~replace(., is.na(.), 0))) %>%
as.dist()
adonis2(b1f_dis_table_kegg ~ trial + sex + breed + treatment + trial + trial * age,
permutations = perm,
data = chicken_metadata,
by = "term")Permutation test for adonis under reduced model
Terms added sequentially (first to last)
Blocks: with(chicken_metadata, pen)
Permutation: free
Number of permutations: 999
adonis2(formula = b1f_dis_table_kegg ~ trial + sex + breed + treatment + trial + trial * age, data = chicken_metadata, permutations = perm, by = "term")
Df SumOfSqs R2 F Pr(>F)
trial 1 2.860 0.05468 26.0870 0.001 ***
sex 1 0.492 0.00941 4.4878 0.001 ***
breed 1 0.905 0.01731 8.2583 0.001 ***
treatment 2 0.857 0.01638 3.9066 0.001 ***
age 1 4.137 0.07909 37.7309 0.001 ***
trial:age 1 1.393 0.02663 12.7057 0.001 ***
Residual 380 41.665 0.79651
Total 387 52.310 1.00000
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
6.3.2 Community composition
perm <- how(nperm = 999)
setBlocks(perm) <- with(chicken_metadata, pen)
t_hel <-
hel %>%
t() %>%
as.data.frame()
adonis2(t_hel ~ trial * sampling_time +
breed * sampling_time +
sex * sampling_time +
treatment * sampling_time,
permutations = perm,
data = chicken_metadata,
by = "term")Permutation test for adonis under reduced model
Terms added sequentially (first to last)
Blocks: with(chicken_metadata, pen)
Permutation: free
Number of permutations: 999
adonis2(formula = t_hel ~ trial * sampling_time + breed * sampling_time + sex * sampling_time + treatment * sampling_time, data = chicken_metadata, permutations = perm, by = "term")
Df SumOfSqs R2 F Pr(>F)
trial 1 0.7573 0.02939 16.6760 0.001 ***
sampling_time 2 6.3129 0.24504 69.5085 0.001 ***
breed 1 0.2590 0.01005 5.7035 0.001 ***
sex 1 0.0971 0.00377 2.1387 0.001 ***
treatment 2 0.1423 0.00552 1.5664 0.001 ***
trial:sampling_time 2 0.8781 0.03408 9.6680 0.001 ***
sampling_time:breed 2 0.1629 0.00632 1.7937 0.026 *
sampling_time:sex 2 0.1321 0.00513 1.4548 0.086 .
sampling_time:treatment 4 0.2188 0.00849 1.2047 0.174
Residual 370 16.8020 0.65219
Total 387 25.7625 1.00000
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis2(t_hel ~ trial * sampling_time + breed + sex + treatment,
permutations = perm,
data = chicken_metadata,
by = "term")Permutation test for adonis under reduced model
Terms added sequentially (first to last)
Blocks: with(chicken_metadata, pen)
Permutation: free
Number of permutations: 999
adonis2(formula = t_hel ~ trial * sampling_time + breed + sex + treatment, data = chicken_metadata, permutations = perm, by = "term")
Df SumOfSqs R2 F Pr(>F)
trial 1 0.7573 0.02939 16.5310 0.001 ***
sampling_time 2 6.3129 0.24504 68.9041 0.001 ***
breed 1 0.2590 0.01005 5.6539 0.001 ***
sex 1 0.0971 0.00377 2.1201 0.001 ***
treatment 2 0.1423 0.00552 1.5528 0.001 ***
trial:sampling_time 2 0.8781 0.03408 9.5839 0.001 ***
Residual 378 17.3159 0.67214
Total 387 25.7625 1.00000
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
6.4 Community composition development
6.4.1 Distance-based RDA
The resulting plot corresponds to Figure 2b in the manuscript.
set.seed(4)
hel_bray <- vegdist(t(hel), method = 'bray')
hel_bray_sqrt <- sqrt(hel_bray)
pcoa <- cmdscale(hel_bray_sqrt, k = ncol(hel) - 1, eig = TRUE)
pcoa_scores <- pcoa$points
db_rda <- rda(pcoa_scores ~ sampling_time, data = chicken_metadata)
# Stats
anova(db_rda, by = 'term')Permutation test for rda under reduced model
Terms added sequentially (first to last)
Permutation: free
Number of permutations: 999
Model: rda(formula = pcoa_scores ~ sampling_time, data = chicken_metadata)
Df Variance F Pr(>F)
sampling_time 2 0.022776 27.979 0.001 ***
Residual 385 0.156701
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
$r.squared
[1] 0.1269005
$adj.r.squared
[1] 0.1223649
db_rda_scores <-
data.frame(
scores(db_rda, display = 'wa'),
pen = chicken_metadata$pen,
time = chicken_metadata$sampling_time
)
db_rda_scores %>%
ggplot(aes(x = RDA1, y = RDA2, colour = time)) +
geom_point(size = 3) +
scale_color_manual(values = c('#e6a024', '#cc6777', '#5bb4e5')) +
theme_minimal() +
theme(legend.position = 'none',
axis.title.x = element_text(size = 12),
axis.title.y = element_text(size = 12),
axis.text.x = element_text(size = 10),
axis.text.y = element_text(size = 10),
axis.title = element_text(size = 8),
panel.grid.minor.x = element_blank(),
panel.grid.minor.y = element_blank())7 Bacterial temporal trends
7.2 Functional ordination - Temporal PCoA with PCA loadings
The resulting plot corresponds to Figure 2c.
gift_pcoa <-
gifts_elements %>%
as.data.frame() %>%
vegdist(method="euclidean") %>%
pcoa()
gift_pcoa_rel_eigen <- gift_pcoa$values$Relative_eig[1:10]
# Get genome positions
gift_pcoa_vectors <-
gift_pcoa$vectors %>% #extract vectors
as.data.frame() %>%
select(Axis.1,Axis.2) # keep the first 2 axes
mag_weighted %>%
rownames_to_column(var = "genome") %>%
mutate_at(vars(-genome),~./sum(.)) %>%
pivot_longer(-genome, names_to = "animal_code", values_to = "count") %>%
left_join(chicken_metadata, by = join_by(animal_code == animal_code)) %>%
filter(!is.na(sampling_time)) %>%
group_by(genome, trial, sampling_time) %>%
summarise(count = mean(count)) %>%
left_join(gift_pcoa_vectors %>% rownames_to_column(var = "genome"), by = "genome") %>%
left_join(genome_taxonomy %>% select(genome, phylum, order), by = "genome") %>%
ggplot() +
theme_minimal() +
#genome positions
geom_point(aes(x = Axis.1, y = Axis.2, color = order, size = count),
alpha = 0.8, shape = 16) +
# colors
scale_color_manual(values = order_colors) +
# scale_color_manual(values=phylum_colors) +
scale_size_continuous(range = c(1,10)) +
# scale adjustments
scale_x_continuous(name = paste0("PCoA1 (",round(gift_pcoa_rel_eigen[1]*100, digits = 2), " %)")) +
scale_y_continuous(name = paste0("PCoA2 (",round(gift_pcoa_rel_eigen[2]*100, digits = 2), " %)")) +
facet_grid(trial ~ sampling_time) +
theme(legend.position = "none",
axis.line = element_line(color = "grey30"),
panel.border = element_rect(color = "grey80", fill = NA),
axis.ticks = element_line(color = "grey30"),
panel.grid = element_line(color = "grey80"),
strip.background = element_rect(fill = "grey80", color = NA))7.3 Compositional barplots
7.3.1 Compositional barplot at order level
physeq_comp_rare_order <-
aggregate_rare(
phylo_seq,
level = "order",
detection = 0.01 / 100,
prevalence = 50 / 100,
include.lowest = TRUE
)
phy_order_df <-
psmelt(physeq_comp_rare_order) %>%
select(OTU, Sample, trial_age, Abundance) %>%
rename(order = 'OTU') %>%
separate_wider_delim(trial_age, "_", names = c("trial", "sampling_time"))# separating by sampling time and trial
phy_order_df %>%
mutate(order = factor(order,
levels = c('Veillonellales', 'Acidaminococcales',
'Selenomonadales', 'UBA4068', 'UBA1212',
'Monoglobales', 'Peptostreptococcales',
'Monoglobales_A', 'Clostridiales',
'Lachnospirales', 'Christensenellales',
'TANB77', 'Oscillospirales', 'RFN20',
'Bacillales','Acholeplasmatales',
'Erysipelotrichales', 'ML615J-28',
'Lactobacillales','RF39','Deferribacterales',
'Actinomycetales', 'Coriobacteriales',
'Gastranaerophilales','Flavobacteriales',
'Bacteroidales', 'Rs-D84', 'Burkholderiales',
'RF32','Enterobacterales',
'Desulfovibrionales', 'Verrucomicrobiales',
'Opitutales','Victivallales',
'Methanomassiliicoccales', 'Synergistales',
'Campylobacterales','Saccharimonadales',
'Methanobacteriales', 'Methanomicrobiales'))) %>%
mutate(sampling_time = str_replace(sampling_time,
pattern = "7",
replacement = "Day 7"),
sampling_time = str_replace(sampling_time,
pattern = "21",
replacement = "Day 21"),
sampling_time = str_replace(sampling_time,
pattern = "35",
replacement = "Day 35")) %>%
mutate(sampling_time = factor(sampling_time,
levels = c("Day 7", "Day 21", "Day 35"))) %>%
ggplot(aes(x = Abundance, y = Sample,
fill = order, group = order)) +
geom_bar(stat = "identity", position = "stack", width = 1,
color ="darkgrey", size = 0.005) +
facet_nested(sampling_time * trial ~., scales = "free_y") +
scale_color_manual('Order', values = order_colors) +
scale_fill_manual('Order', values = order_colors) +
ylab('Relative abundance') +
theme(axis.text.y = element_blank(),
axis.title.y = element_blank(),
axis.ticks.y = element_blank(),
panel.background = element_blank(),
panel.border = element_blank(),
panel.spacing.x = unit(0.1, "cm"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
legend.text = element_text(size = 8),
legend.title = element_text(size = 10),
strip.text = element_text(size = 8),
legend.position = "right") +
guides(fill = guide_legend(ncol = 1))8 Temporal HMSC setup
HMSC models are computationally intensive. It is advisable to run this script on a server. The code is made to be reproduced on any supercomputer.
8.1 Load MG data
8.1.2 Generate input objects
# Load data
load("data/data_mg.Rdata")
# Metadata summary
design <-
chicken_metadata[, c('animal_code', 'trial', 'pen', 'age','breed',
'sex', 'treatment', 'chicken_body_weight')] %>%
column_to_rownames("animal_code")
design$log_seq_depth <- log(colSums(read_counts %>% column_to_rownames("genome")))
design$animal_code <- rownames(design)
for (i in 1:ncol(design)) {
if (is.character(design[,i])) {
design[,i] <- factor(design[,i])
}
}8.2 Prepate data for HMSC
8.2.1 Define tables
# Genome count table (quantitative community data)
YData <- log1p(mag_weighted %>%
t() %>%
as.data.frame())
# Fixed effects data (explanatory variables)
XData <- design
mean(rownames(YData) == rownames(XData)) # Ydata and XData in the same order
# Genome phylogeny
PData <- genome_tree
YData <- YData[,colnames(YData) %in% genome_tree$tip.label] # Filter missing MAGs in tree8.3 Define formula
# Define XFormula
XFormula_Time <- ~log_seq_depth + trial + sex + breed + treatment + age
# Study Design
StudyDesign <- design[,c(2,9)]
rL.Pen <- HmscRandomLevel(units = levels(StudyDesign$pen))8.3.2 Define MCMC characteristics
## How often to sample the MCM
samples_list = c(5, 250, 250)
# The number of MCMC steps between each recording sample
thin_list = c(1,1,10)
nst = length(thin_list)
# The number of MCMC chains ot use
nChains = 4
# Load only the unfitted models
load(file.path(ModelDir, "unfitted_model.Rdata"))
unf_model <- m8.4 Fit model
set.seed(1)
for (Lst in 1:length(samples_list)) {
thin = thin_list[Lst]
samples = samples_list[Lst]
# Fit the model
m = sampleMcmc(unf_model,
samples = samples,
thin = thin,
adaptNf = rep(ceiling(0.4*samples*thin), unf_model$nr),
transient = ceiling(0.5*samples*thin),
nChains = nChains,
nParallel = nChains)
# Create file name
filename = paste("temporal_model",
"_thin_", as.character(thin),
"_samples_", as.character(samples),
"_chains_", as.character(nChains),
".Rdata", sep = "")
# Save file
save(m, file = file.path(ModelDir, filename))
}8.5 Evaluate convergence
Figure not added to Supplementary.
set.seed(1)
for (i in 1) {
ma = NULL
na = NULL
ma_rho = NULL
na_rho = NULL
for (Lst in 1:nst) {
thin = thin_list[Lst]
samples = samples_list[Lst]
filename = paste("temporal_model","_thin_", as.character(thin),
"_samples_", as.character(samples),
"_chains_", as.character(nChains),
".Rdata", sep = "")
load(file = file.path(ModelDir, filename))
mpost = convertToCodaObject(m,
spNamesNumbers = c(T,F),
covNamesNumbers = c(T,F))
## Beta - Species niches - response of MAGs to the fixed effects
psrf.beta = gelman.diag(mpost$Beta,multivariate = FALSE)$psrf
## Rho - Influence of phylogeny on species niches - phylogenetic signal
psrf.rho = gelman.diag(mpost$Rho,multivariate = FALSE)$psrf
## Beta
if (is.null(ma)) {
ma = psrf.beta[,1]
na = paste0("temporal_model_", as.character(thin),
",", as.character(samples))
} else {
ma = cbind(ma,psrf.beta[,1])
na = c(na,paste0("temporal_model_", as.character(thin),
",", as.character(samples)))
}
## Rho
if (is.null(ma_rho)) {
ma_rho = psrf.rho[,1]
na_rho = paste0("temporal_model_", as.character(thin),
",", as.character(samples))
} else {
ma_rho = cbind(ma_rho,psrf.rho[,1])
na_rho = c(na_rho,paste0("temporal_model_", as.character(thin),
",", as.character(samples)))
}
}
# Create file name
panel.name = paste("MCMC_convergence",
"temporal_model_", as.character(i),
".pdf", sep = "")
# Plot
pdf(file = file.path(PanelDir, panel.name))
## Beta
par(mfrow = c(2,1))
vioplot(ma,
col = rainbow_hcl(1),
names = na,
ylim = c(0, max(ma)),
main = "psrf(beta)")
vioplot(ma,
col = rainbow_hcl(1),
names = na,
ylim = c(0.9,1.1),
main = "psrf(beta)")
## Rho
par(mfrow = c(2,1))
vioplot(ma_rho,
col = rainbow_hcl(1),
names = na_rho,
ylim = c(0, max(ma_rho)),
main = "psrf(rho)")
vioplot(ma_rho,
col = rainbow_hcl(1),
names = na_rho,
ylim = c(0.9,1.1),
main = "psrf(rho)")
dev.off()
}9 HMSC analysis
9.1 Load model
# Directory
localDir = "."
ModelDir = file.path(localDir, "temporal_hmsc/models")
MFDir = file.path(localDir, "temporal_hmsc/model_fit")
# Load model
if(!file.exists("temporal_hmsc/models/temporal_model_thin_10_samples_250_chains_4.Rdata")){
download.file(
url = 'https://sid.erda.dk/share_redirect/Bd8UfDO2D6/temporal_model_thin_10_samples_250_chains_4.Rdata',
destfile = 'temporal_hmsc/models/temporal_model_thin_10_samples_250_chains_4.Rdata', method = 'curl'
)
}
load(file = file.path(ModelDir, 'temporal_model_thin_10_samples_250_chains_4.Rdata'))9.2 Cross-validation
The cross-validation part is also computationally intense. Make sure you have the right capacity for it.
# Create partitions
set.seed(12)
partition_1 <- createPartition(hM = m, nfolds = 2)
partition_2 <- c(rep(1,sum(m$XData$trial == "CA")),
rep(2,sum(m$XData$trial == "CB")))
set.seed(1)
predY.MF <- computePredictedValues(m, expected = TRUE)
MF <- evaluateModelFit(hM = m, predY = predY.MF)
filename <- file.path(MFDir, paste("MF_chains_4_thin_10_samples_250.rds"))
saveRDS(MF, file = filename)
MF <- readRDS(file = filename)
mean(MF$R2)9.2.1 Model fit using 2-fold CV: samples randomly assigned to folds
set.seed(1)
predY.CV_1 <- computePredictedValues(m,
expected = TRUE,
partition = partition_1,
nChains = 1,
nParallel = 1)
MF.CV_1 <- evaluateModelFit(hM = m, predY = predY.CV_1)
# create file name and save
filename <- file.path(MFDir, paste("MF_CV1_chains_4_thin_10_samples_250.rds"))
saveRDS(MF.CV_1,file = filename)
MF_CV_1 <- readRDS(file = filename)
mean(MF_CV_1$R2)9.2.2 Model fit using 2-fold CV: each fold contains the samples from one trial
set.seed(1)
predY.CV_2 <- computePredictedValues(m,
expected = TRUE,
partition = partition_2,
nChains = 1,
nParallel = 1)
MF.CV_2 <- evaluateModelFit(hM = m, predY = predY.CV_2)
# create file name and save
filename <- file.path(MFDir, paste("MF_CV2_chains_4_thin_10_samples_250.rds"))
saveRDS(MF.CV_2, file = filename)
MF_CV_2 <- readRDS(file = filename)
mean(MF_CV_2$R2)9.3 Functional structure
beta_post <- getPostEstimate(m, parName = "Beta")
plotBeta(m, beta_post, supportLevel = 0.95, plotTree = TRUE)
Gradient_age <- constructGradient(m,
focalVariable = "age",
non.focalVariables = 1)
predY_age <- predict(m, Gradient = Gradient_age, expected = TRUE)
# Example using cmag_376
plotGradient(m,
Gradient_age,
pred = predY_age,
yshow = 0,
measure = "Y",
index = 376,
showData = TRUE)9.4 Bacterial temporal trends table
beta_post <- getPostEstimate(m, parName = "Beta")
# Increasing MAGs
increasers <- colnames(beta_post$support)[beta_post$support[8,] > 0.95]
increasers_df <- data.frame(mag_id = increasers,
parameter = beta_post$mean[8,colnames(m$Y) %in% increasers])
increasers_df$hmsc_trend <- "increaser"
# Decreasing MAGs
decreasers <- colnames(beta_post$support)[beta_post$support[8,] < 0.05]
decreasers_df <- data.frame(mag_id = decreasers,
parameter = beta_post$mean[8,colnames(m$Y) %in% decreasers])
decreasers_df$hmsc_trend <- "decreaser"
trends <-
colnames(beta_post$support) %>%
as.data.frame() %>%
dplyr::rename(mag_id = '.') %>%
left_join(rbind(increasers_df, decreasers_df), by = 'mag_id') %>%
rename(genome = 'mag_id') %>%
replace_na(list(hmsc_trend = 'stable')) %>%
write_tsv("data/hmsc_mag_trend.tsv")9.5 Correlate bacterial MCI and response to time
The resulting plot corresponds to Figure 2e.
mci_df <-
gifts_elements %>%
as.data.frame() %>%
mutate(avg_mci = rowMeans(.)) %>%
select(avg_mci) %>%
rownames_to_column(var = 'genome')
trends %>%
left_join(genome_taxonomy %>% select(genome, order), by = 'genome') %>%
left_join(mci_df, by = 'genome') %>%
ggplot() +
geom_point(aes(x = parameter, y = avg_mci, color = order), size = 1) +
scale_color_manual(values = order_colors) +
geom_smooth(aes(x = parameter, y = avg_mci),
method = 'lm', linewidth = 0.3) +
theme_minimal() +
theme(panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
axis.title.x = element_text(size = 8),
axis.title.y = element_text(size = 8),
axis.text.x = element_text(size = 8),
axis.text.y = element_text(size = 8),
legend.position = 'none')9.7 Phylogenetic correlogram
The resulting plot corresponds to Figure 2d.
9.7.1 Calcultate phylogenetic distance
pw_phylo_dist <- cophenetic.phylo(genome_tree)
xy <- t(combn(colnames(pw_phylo_dist), 2))
pw_phylo_dist <- data.frame(xy, dist = pw_phylo_dist[xy])
colnames(pw_phylo_dist) <- c('genome.x', 'genome.y', 'distance')
pw_phylo_dist_taxa <-
pw_phylo_dist %>%
inner_join(genome_taxonomy, by = c('genome.x' = 'genome')) %>%
inner_join(genome_taxonomy, by = c('genome.y' = 'genome'))
# Create distance table
tax_table <- c()
for (i in c(1:nrow(pw_phylo_dist_taxa))) {
pair <- pw_phylo_dist_taxa[i,]
#Phylum level
if (!is.na(pair$phylum.x != pair$phylum.y) & pair$phylum.x != pair$phylum.y) {
row <- c('Phylum', pair$distance)
} else {
#Class level
if (!is.na(pair$class.x != pair$class.y) & pair$class.x != pair$class.y) {
row <- c('Class', pair$distance)
} else {
#Order level
if (!is.na(pair$order.x != pair$order.y) & pair$order.x != pair$order.y) {
row <- c('Order', pair$distance)
} else {
#Family level
if (!is.na(pair$family.x != pair$family.y) & pair$family.x != pair$family.y) {
row <- c('Family', pair$distance)
} else {
# Genus
if (!is.na(pair$genus.x != pair$genus.y) & pair$genus.x != pair$genus.y) {
row <- c('Genus', pair$distance)
}
}
}
}
}
tax_table <- rbind(tax_table,row)
}
tax_table <-
tax_table %>%
data.frame() %>%
dplyr::rename(taxonomy = 'X1', distance = 'X2') %>%
mutate(distance = as.numeric(distance)) %>%
write_tsv("data/taxonomy_distance.tsv")tax_table <- read_tsv("data/taxonomy_distance.tsv")
tax_table %>%
ggplot(aes(x = distance,
fill = taxonomy,
color = taxonomy,
alpha = 0.1
)) +
geom_density(adjust = 5) +
scale_fill_manual(values = c('#E69F00','#e28cff','#999999','#56B4E9','#99cc00')) +
scale_color_manual(values = c('#E69F00','#e28cff','#999999','#56B4E9','#99cc00')) +
theme_void()9.7.2 Phylogenetic signal
mpost <- convertToCodaObject(m)
quantile(unlist(mpost$Rho), probs = c(.05,.5,.95))
age_parameter <- beta_post$mean[8,]
age_parameter_phyloSorted <-
data.frame(age_parameter = age_parameter[
match(m$phyloTree$tip.label,
names(age_parameter))
])
mean(rownames(age_parameter_phyloSorted) == m$phyloTree$tip.label)
new_tree <- m$phyloTree
new_tree$node.label <- NULL
obj <- phylo4d(new_tree, tip.data = age_parameter_phyloSorted)
barplot.phylo4d(obj)
age.cg <- phyloCorrelogram(obj, trait = "age_parameter")
saveRDS(age.cg,file = "age.cg.rds")
age.cg <- readRDS(file = "age.cg.rds")
plot(age.cg)10 Bacterial community composition
10.2 GIFTs per MAG
10.2.1 Plot GIFTs of all bacteria by element
This plot is not included in Supplementary
gifts_elements %>%
as.data.frame() %>%
rownames_to_column(var = 'genome') %>%
pivot_longer(!genome, names_to = 'Code_element', values_to = 'mci') %>%
inner_join(GIFT_db2, by = 'Code_element') %>%
ggplot(aes(x = genome,
y = Code_element,
fill = mci)) +
geom_tile(color = '#ffffff') +
scale_y_discrete(guide = guide_axis(check.overlap = TRUE)) +
scale_x_discrete(guide = guide_axis(check.overlap = TRUE)) +
facet_grid(Code_function ~ ., scales = 'free', space = 'free') +
scale_fill_gradientn(colours = rev(terrain.colors(10))) +
theme_void(base_size = 18) +
theme(axis.text.y = element_text(),
legend.position = 'top')10.3 GIFTs per community
10.3.1 Calculate community GIFTs
10.3.2 Plot community MCI
The resulting plot corresponds to Figure 2f in the manuscript.
load("data/mci_com.Rdata")
funcs_com %>%
as.data.frame() %>%
rownames_to_column('animal_code') %>%
rowwise() %>%
mutate(overall = mean(c_across(B01:D09))) %>%
select(animal_code, overall) %>%
left_join(chicken_metadata %>%
select(animal_code, sampling_time),
by = 'animal_code') %>%
ggplot(aes(x = sampling_time,
y = overall,
group = sampling_time)) +
geom_jitter(aes_string(colour = 'sampling_time'), size = 0.3, width = 0.2, alpha = 0.6) +
geom_boxplot(aes_string(fill = 'sampling_time'), width = 0.4, alpha = 0.5, outlier.color = NA) +
scale_fill_manual(values = c('#E69F00', '#CC6677', '#56B4E9')) +
scale_color_manual(values = c('#E69F00', '#CC6677', '#56B4E9')) +
theme_minimal() +
theme(panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
axis.title.x = element_text(size = 8),
axis.title.y = element_text(size = 8),
axis.text.x = element_text(size = 8),
axis.text.y = element_text(size = 8),
plot.title = element_text(size = 10),
legend.position = 'none')11 Associate microbial community metrics with chicken body weight
11.2 Associate alpha diversity metrics with chicken BW
The resulting plots are not included in Supplementary.
11.3 Phylogenetic
ggplot(alpha_div, aes(x = phylogenetic, y = chicken_body_weight)) +
geom_point() +
geom_smooth(method = "lm") +
facet_wrap(~ sampling_time * trial, scales = 'free')11.4 Functional KEGG
ggplot(alpha_div, aes(x = functional_kegg, y = chicken_body_weight)) +
geom_point() +
geom_smooth(method = "lm") +
facet_wrap(~ sampling_time * trial, scales = 'free')11.5 Linear regressions
11.5.1 Neutral
The resulting plot corresponds to Figure 3a.
div_all_day_35 <-
alpha_div %>%
filter(sampling_time == '35') %>%
filter(trial != 'CC') #%>%
# filter(!animal_code %in% c("CB12.17", "CA04.16", "CA03.17")) # ¿? double check if we are OK filtering some samples, they show out of place diversity
N <- lme(chicken_body_weight ~ age + trial + neutral,
random = ~1|pen,
data = div_all_day_35)
summary(N)Linear mixed-effects model fit by REML
Data: div_all_day_35
AIC BIC logLik
1971.21 1988.686 -979.6049
Random effects:
Formula: ~1 | pen
(Intercept) Residual
StdDev: 146.892 267.3176
Fixed effects: chicken_body_weight ~ age + trial + neutral
Value Std.Error DF t-value p-value
(Intercept) -2377.5282 1033.8704 90 -2.299638 0.0238
age 136.6093 28.5987 90 4.776764 0.0000
trialCB -37.8228 63.4256 46 -0.596333 0.5539
neutral -2.8996 0.8471 90 -3.423055 0.0009
Correlation:
(Intr) age trilCB
age -0.995
trialCB 0.047 -0.091
neutral -0.108 0.020 0.151
Standardized Within-Group Residuals:
Min Q1 Med Q3 Max
-1.9136097 -0.7156661 -0.0462182 0.5659185 2.7314491
Number of Observations: 140
Number of Groups: 48
11.5.2 Phylogenetic
P <- lme(chicken_body_weight ~ age + trial + phylogenetic,
random = ~1|pen,
data = div_all_day_35)
summary(P)Linear mixed-effects model fit by REML
Data: div_all_day_35
AIC BIC logLik
1971.488 1988.964 -979.7442
Random effects:
Formula: ~1 | pen
(Intercept) Residual
StdDev: 133.6237 280.5324
Fixed effects: chicken_body_weight ~ age + trial + phylogenetic
Value Std.Error DF t-value p-value
(Intercept) -2405.5851 1082.1821 90 -2.222902 0.0287
age 142.6677 29.9857 90 4.757858 0.0000
trialCB 10.8886 62.3303 46 0.174691 0.8621
phylogenetic -64.2194 33.3309 90 -1.926725 0.0572
Correlation:
(Intr) age trilCB
age -0.970
trialCB 0.086 -0.086
phylogenetic -0.137 -0.104 -0.123
Standardized Within-Group Residuals:
Min Q1 Med Q3 Max
-1.8560595 -0.6281856 -0.0497021 0.6532160 2.7098731
Number of Observations: 140
Number of Groups: 48
11.5.3 Functional KEGG
Q_kegg <- lme(chicken_body_weight ~ age + trial + functional_kegg,
random = ~1|pen,
data = div_all_day_35)
summary(Q_kegg)Linear mixed-effects model fit by REML
Data: div_all_day_35
AIC BIC logLik
1968.771 1986.247 -978.3854
Random effects:
Formula: ~1 | pen
(Intercept) Residual
StdDev: 142.4688 281.7832
Fixed effects: chicken_body_weight ~ age + trial + functional_kegg
Value Std.Error DF t-value p-value
(Intercept) -3102.4077 1382.9000 90 -2.243407 0.0273
age 135.6025 30.3166 90 4.472873 0.0000
trialCB -4.7767 63.6887 46 -0.075001 0.9405
functional_kegg 322.1611 726.2162 90 0.443616 0.6584
Correlation:
(Intr) age trilCB
age -0.687
trialCB 0.063 -0.095
functional_kegg -0.625 -0.137 -0.016
Standardized Within-Group Residuals:
Min Q1 Med Q3 Max
-2.00634774 -0.63344629 -0.09338556 0.53395793 2.56270649
Number of Observations: 140
Number of Groups: 48
11.6 Associate community MCI with diversity
domains_com_2 <-
funcs_com %>%
as.data.frame(optional = TRUE) %>%
rownames_to_column(var = "animal_code") %>%
rowwise() %>%
mutate(overall_com_mci = mean(c_across(B01:D09))) %>%
select(animal_code, overall_com_mci) %>%
left_join(alpha_div) %>%
filter(sampling_time == '35')
# Neutral
N <- lme(neutral ~ age + trial + overall_com_mci,
random = ~1|pen,
data = domains_com_2)
summary(N)Linear mixed-effects model fit by REML
Data: domains_com_2
AIC BIC logLik
1320.058 1337.534 -654.0289
Random effects:
Formula: ~1 | pen
(Intercept) Residual
StdDev: 14.1856 25.89659
Fixed effects: neutral ~ age + trial + overall_com_mci
Value Std.Error DF t-value p-value
(Intercept) 381.3418 121.82183 90 3.130324 0.0024
age -0.5173 2.76996 90 -0.186768 0.8523
trialCB -9.2723 6.09225 46 -1.521988 0.1349
overall_com_mci -906.4972 254.91596 90 -3.556063 0.0006
Correlation:
(Intr) age trilCB
age -0.807
trialCB 0.107 -0.093
overall_com_mci -0.576 -0.017 -0.095
Standardized Within-Group Residuals:
Min Q1 Med Q3 Max
-2.3225513 -0.7220818 0.0119940 0.6066901 2.4257749
Number of Observations: 140
Number of Groups: 48
# Phylogenetic
P <- lme(phylogenetic ~ age + trial + overall_com_mci,
random = ~1|pen,
data = domains_com_2)
summary(P)Linear mixed-effects model fit by REML
Data: domains_com_2
AIC BIC logLik
262.8678 280.3437 -125.4339
Random effects:
Formula: ~1 | pen
(Intercept) Residual
StdDev: 0.0001588075 0.5894999
Fixed effects: phylogenetic ~ age + trial + overall_com_mci
Value Std.Error DF t-value p-value
(Intercept) 19.24326 2.656478 90 7.243901 0.0000
age 0.09722 0.060649 90 1.602937 0.1125
trialCB 0.34837 0.101108 46 3.445529 0.0012
overall_com_mci -52.93094 5.272811 90 -10.038468 0.0000
Correlation:
(Intr) age trilCB
age -0.828
trialCB 0.148 -0.121
overall_com_mci -0.572 0.015 -0.120
Standardized Within-Group Residuals:
Min Q1 Med Q3 Max
-2.54298266 -0.48022804 0.01396494 0.68418701 2.03727482
Number of Observations: 140
Number of Groups: 48
# Functional
Q <- lme(functional_kegg ~ age + trial + overall_com_mci,
random = ~1|pen,
data = domains_com_2)
summary(Q)Linear mixed-effects model fit by REML
Data: domains_com_2
AIC BIC logLik
-518.8687 -501.3928 265.4344
Random effects:
Formula: ~1 | pen
(Intercept) Residual
StdDev: 0.004408918 0.03300953
Fixed effects: functional_kegg ~ age + trial + overall_com_mci
Value Std.Error DF t-value p-value
(Intercept) 1.5837707 0.14937823 90 10.602420 0.0000
age 0.0058861 0.00340933 90 1.726471 0.0877
trialCB 0.0043668 0.00581201 46 0.751348 0.4563
overall_com_mci -1.4165522 0.29806837 90 -4.752441 0.0000
Correlation:
(Intr) age trilCB
age -0.826
trialCB 0.144 -0.119
overall_com_mci -0.572 0.011 -0.117
Standardized Within-Group Residuals:
Min Q1 Med Q3 Max
-2.3045841 -0.5938668 -0.1627596 0.4672308 3.9317166
Number of Observations: 140
Number of Groups: 48
11.7 Associate community MCI with chicken BW
mci <- lme(chicken_body_weight ~ age + trial + overall_com_mci,
random = ~1|pen,
data = domains_com_2)
summary(mci)Linear mixed-effects model fit by REML
Data: domains_com_2
AIC BIC logLik
1965.926 1983.401 -976.9628
Random effects:
Formula: ~1 | pen
(Intercept) Residual
StdDev: 149.5935 279.2234
Fixed effects: chicken_body_weight ~ age + trial + overall_com_mci
Value Std.Error DF t-value p-value
(Intercept) -2265.3072 1312.0446 90 -1.726547 0.0877
age 138.5372 29.8369 90 4.643157 0.0000
trialCB -0.8276 65.0269 46 -0.012727 0.9899
overall_com_mci -1747.0996 2741.7924 90 -0.637211 0.5256
Correlation:
(Intr) age trilCB
age -0.807
trialCB 0.108 -0.094
overall_com_mci -0.576 -0.016 -0.095
Standardized Within-Group Residuals:
Min Q1 Med Q3 Max
-1.9808054 -0.6163449 -0.1085188 0.5455396 2.5326459
Number of Observations: 140
Number of Groups: 48
11.8 Associate community weighted genome size with chicken BW
metadata_day_35 <-
chicken_metadata %>%
filter(sampling_time == "35")
mag_lengthed <-
round(sweep(total, MARGIN = 1, genome_stats$mag_length, `*`), 0) %>%
t() %>%
as.data.frame() %>%
rownames_to_column(var = "animal_code") %>%
rowwise() %>%
mutate(comm_length = mean(c_across(2:389))) %>%
select(animal_code, comm_length) %>%
filter(animal_code %in% metadata_day_35$animal_code) %>%
left_join(metadata_day_35, by = 'animal_code')
L <- lme(chicken_body_weight ~ scale(age) + trial + scale(log(comm_length)),
random = ~ 1|pen,
data = mag_lengthed)
summary(L)Linear mixed-effects model fit by REML
Data: mag_lengthed
AIC BIC logLik
1975.692 1993.168 -981.8459
Random effects:
Formula: ~1 | pen
(Intercept) Residual
StdDev: 144.7197 281.2538
Fixed effects: chicken_body_weight ~ scale(age) + trial + scale(log(comm_length))
Value Std.Error DF t-value p-value
(Intercept) 2231.4630 46.49952 90 47.98894 0.0000
scale(age) 114.0616 24.97663 90 4.56673 0.0000
trialCB -1.2398 66.94795 46 -0.01852 0.9853
scale(log(comm_length)) -4.9378 29.89947 90 -0.16515 0.8692
Correlation:
(Intr) scl(g) trilCB
scale(age) 0.088
trialCB -0.728 -0.113
scale(log(comm_length)) 0.211 0.070 -0.291
Standardized Within-Group Residuals:
Min Q1 Med Q3 Max
-2.0015452 -0.6185148 -0.1001931 0.5667678 2.5817652
Number of Observations: 140
Number of Groups: 48
12 Bacteria vs Chicken BW HMSC setup
HMSC models are computationally intensive. It is advisable to run this script on a server. The code is made to be reproduced on any supercomputer.
12.1 Load MG and MT data
12.1.2 Generate input objects
# Load data
load("data/data_mg.Rdata")
# Metadata summary
design <-
chicken_metadata[, c('animal_code', 'trial', 'pen', 'age','breed',
'sex', 'treatment', 'chicken_body_weight')] %>%
column_to_rownames("animal_code")
design$log_seq_depth <- log(colSums(read_counts %>% column_to_rownames("genome")))
design$animal_code <- rownames(design)
for (i in 1:ncol(design)) {
if (is.character(design[,i])) {
design[,i] <- factor(design[,i])
}
}
mean(rownames(design) == rownames(mag_weighted))
# Filter day 35
design_day35 <-
design %>%
filter(age > 25)
mag_weighted_day35 <-
mag_weighted %>%
rownames_to_column(var = 'genome') %>%
filter(genome %in% rownames(design_day35)) %>%
column_to_rownames(var = 'genome') %>%
t() %>%
as.data.frame()
dim(mag_weighted_day35)
dim(design_day35)
mean(rownames(design_day35) == rownames(mag_weighted_day35))12.2 Prepate data for HMSC
12.2.1 Define tables
# Genome count table (quantitative community data)
YData <- log1p(mag_weighted_day35)
# Fixed effects data (explanatory variables)
XData <- design_day35
mean(rownames(YData) == rownames(XData)) # Ydata and XData in the same order
# Genome phylogeny
PData <- genome_tree
YData <- YData[,colnames(YData) %in% genome_tree$tip.label] # Filter missing MAGs in tree
# Traits
gifts_functions <-
gifts_elements %>%
as.data.frame() %>%
rownames_to_column(var = 'genome') %>%
filter(genome %in% colnames(YData)) %>%
column_to_rownames(var = 'genome')
mean(gifts_functions$mag_id == colnames(YData))
TrData <- data.frame(MCI = TrData)
rownames(TrData) <- colnames(YData)12.2.4 Define MCMC
# Chain characteristics
## How often to sample the MCM
samples_list = c(5, 250, 250)
# The number of MCMC steps between each recording sample
thin_list = c(1,1,10)
# The number of MCMC chains ot use
nChains = 4
# Load only the unfitted models
load(file.path(ModelDir,"unfitted_model.Rdata"))
unf_model <- m12.3 Fit model
set.seed(1)
for (Lst in 1:length(samples_list)) {
thin = thin_list[Lst]
samples = samples_list[Lst]
# fit the model
m = sampleMcmc(unf_model,
samples = samples,
thin = thin,
adaptNf = rep(ceiling(0.4*samples*thin), unf_model$nr),
transient = ceiling(0.5*samples*thin),
nChains = nChains,
nParallel = nChains)
# create file name
filename = paste("bw_model",
"_thin_", as.character(thin),
"_samples_", as.character(samples),
"_chains_", as.character(nChains),
".Rdata", sep = "")
# save file
save(model, file = file.path(ModelDir,filename))
}13 Bacteria vs Chicken BW HMSC analysis
13.1 Load model
13.1.2 Load model
if(!file.exists("hmsc_bw/models/bw_model_thin_10_samples_250_chains_4.Rdata")){
download.file(
url = 'https://sid.erda.dk/share_redirect/Bd8UfDO2D6/bw_model_thin_10_samples_250_chains_4.Rdata',
destfile = 'hmsc_bw/models/bw_model_thin_10_samples_250_chains_4.Rdata', method = 'curl')
}
load(file = file.path(ModelDir,"bw_model_thin_10_samples_250_chains_4.Rdata"))13.5 Create a data table
load("hmsc_bw/model_fit/MF_thin_10_samples_250_chains_4.Rdata")
rel_abu <- colMeans(vegan::decostand(exp(m$Y), method = "total")) * 100
toplot <- data.frame(parameter = bw_param,
bw_support = bw_support,
var = bw_var,
pred = MFCV$R2,
var_pred = bw_var * MFCV$R2)
toplot$MAG <- rownames(toplot)
toplot$rel_abu <- rel_abu
rownames(toplot) <- NULL
write.table(toplot, file = "data/data.txt", row.names = F)13.5.1 Associate bacteria response with chicken BW
not included in Supplementary
toplot <- read.table("data/data.txt", , row.names = F)
ggplot() +
geom_point(data = toplot, aes(x = parameter, y = Pred), alpha = 0.7, shape = 16) +
geom_point(data = toplot[toplot$bw_support > 0.975,],
mapping = aes(x = parameter, y = Pred),
color = "red", shape = 16) +
geom_point(data = toplot[toplot$bw_support < 0.025,],
mapping = aes(x = parameter, y = Pred),
color = "blue", shape = 16) +
geom_vline(xintercept = 0) +
geom_hline(yintercept = mean(toplot$Pred, na.rm = TRUE), linetype = "dashed") +
labs(x = "Parameter estimate", y = "Body weight's importance") +
theme_minimal()13.6 Create table with HMSC results
# Total relative abundance of species associated positively and negatively with body weight (~9%)
sum(toplot$rel_abu[(toplot$bw_support > 0.975 | toplot$bw_support < 0.025) & (toplot$var_pred > mean(toplot$var_pred))])
# Total relative abundance of species associated positively with body weight (<1%)
sum(toplot$rel_abu[(toplot$bw_support > 0.975) & (toplot$var_pred > mean(toplot$var_pred))])
# Total relative abundance of species associated negatively with body weight (>8%)
sum(toplot$rel_abu[(toplot$bw_support < 0.025 | toplot$bw_support < 0.025) & (toplot$var_pred > mean(toplot$var_pred))])
# Phylogenetic signal
mpost <- convertToCodaObject(m)
quantile(unlist(mpost$Rho), probs = c(.05,.5,.95))
bw_parameter <- beta_post$mean[4,]
bw_parameter_phyloSorted <-
data.frame(bw_parameter = bw_parameter[
match(m$phyloTree$tip.label,
names(bw_parameter))])
mean(rownames(bw_parameter_phyloSorted) == m$phyloTree$tip.label)
new_tree <- m$phyloTree
new_tree$node.label <- NULL
obj <- phylo4d(new_tree, tip.data = bw_parameter_phyloSorted)
barplot.phylo4d(obj)
bw.cg <- phyloCorrelogram(obj, trait = "bw_parameter")
saveRDS(bw.cg,file = file.path(PanelDir,"bw.cg.rds"))
bw.cg <- readRDS(file = file.path(PanelDir,"bw.cg.rds"))
plot(bw.cg)
bw_parameter_df <- data.frame(genome = names(bw_parameter), bw_response = bw_parameter)
bw_parameter_df$bw_trend <- NA
bw_parameter_df$bw_trend[beta_post$support[4,] > 0.95] <- "positive"
bw_parameter_df$bw_trend[beta_post$support[4,] < 0.05] <- "negative"
bw_parameter_df$bw_trend[is.na(bw_parameter_df$bw_trend)] <- "neutral"
write_tsv(bw_parameter_df,file = "data/hmsc_bw_trend.tsv")14 Comparing metabolic capacity of positively and negatively associated bacteria
14.1 Load MG data
load("data/data_mg.Rdata")
trends_bw <-
read_tsv("data/hmsc_bw_trend.tsv") %>%
mutate(trend_07 = case_when(bw_support > 0.85 ~ 'positive',
bw_support < 0.15 ~ 'negative',
TRUE ~ 'non-significant')) %>%
mutate(trend_09 = case_when(bw_support > 0.95 ~ 'positive',
bw_support < 0.05 ~ 'negative',
TRUE ~ 'non-significant'))
gifts <-
gifts_elements %>%
as.data.frame() %>%
select(where(~ sum(.) > 0))
gifts_funct <-
gifts_functions %>%
as.data.frame() %>%
select(where(~ sum(.) > 0))
avg_mci <- rowMeans(gifts)
gifts_funct$avg_mci <- avg_mci
avg_mci_bio <- rowMeans(gifts[,grepl("B",names(gifts))])
gifts_funct$avg_mci_bio <- avg_mci_bio
avg_mci_deg <- rowMeans(gifts[,grepl("D",names(gifts))])
gifts_funct$avg_mci_deg <- avg_mci_deg
gift_colors <-
read_tsv("data/gift_colors.tsv") %>%
mutate(legend = str_c(Code_function, " - ", Function))14.2 Taxonomy of positively and negatively associated MAGs
14.2.1 Positive group
All MAGs with positive association belong to RF39
trends_bw %>%
filter(trend_09 == "positive") %>%
select(genome) %>%
left_join(genome_taxonomy, by = 'genome') %>%
select(order) %>%
table()order
RF39
10
14.2.2 Negative group
MAGs with negative association belong to various orders, mainly to Lachnospirales and Oscillospirales
trends_bw %>%
filter(trend_09 == "negative") %>%
select(genome) %>%
left_join(genome_taxonomy, by = 'genome') %>%
select(order) %>%
table() %>%
sort()order
Actinomycetales Campylobacterales Monoglobales_A Opitutales RF39 Saccharimonadales RF32 TANB77
1 1 1 1 1 1 4 4
Coriobacteriales Bacteroidales Erysipelotrichales Lactobacillales Christensenellales Oscillospirales Lachnospirales
5 8 8 12 27 35 81
14.3 Compute MCIs of BW positive and negative associated communities
gifts_funct_bw_pos <-
gifts_funct %>%
mutate(avg_mci_bio = gifts_funct$avg_mci_bio,
avg_mci_deg = gifts_funct$avg_mci_deg,
bw_effect = trends_bw$trend_09) %>%
filter(bw_effect == "positive") %>%
select(-bw_effect)
gifts_funct_bw_neu <-
gifts_funct %>%
mutate(avg_mci_bio = gifts_funct$avg_mci_bio,
avg_mci_deg = gifts_funct$avg_mci_deg,
bw_effect = trends_bw$trend_09) %>%
filter(bw_effect == "non-significant") %>%
select(-bw_effect)
gifts_funct_bw_neg <-
gifts_funct %>%
mutate(avg_mci_bio = gifts_funct$avg_mci_bio,
avg_mci_deg = gifts_funct$avg_mci_deg,
bw_effect = trends_bw$trend_09) %>%
filter(bw_effect == "negative") %>%
select(-bw_effect)mean_func <- function(data, indices) {
return(mean(data[indices]))
}
boot_pos_df <- data.frame(matrix(nrow = ncol(gifts_funct_bw_pos), ncol = 4))
colnames(boot_pos_df) <- c("function_id", "mean", "ci_05", "ci_95")
boot_neu_df <- data.frame(matrix(nrow = ncol(gifts_funct_bw_neu), ncol = 4))
colnames(boot_neu_df) <- c("function_id", "mean", "ci_05", "ci_95")
boot_neg_df <- data.frame(matrix(nrow = ncol(gifts_funct_bw_neg), ncol = 4))
colnames(boot_neg_df) <- c("function_id", "mean", "ci_05", "ci_95")
for(i in 1:ncol(gifts_funct_bw_pos)){
boot_pos_df[i, "function_id"] <- colnames(gifts_funct_bw_pos)[i]
if(sum(gifts_funct_bw_pos[, i]) == 0){
boot_pos_df[i, "mean"] <- 0
boot_pos_df[i, "ci_05"] <- 0
boot_pos_df[i, "ci_95"] <- 0
}
else{
boot_temp_pos <- boot(gifts_funct_bw_pos[, i], statistic = mean_func, R = 10000)
boot_temp_pos_ci <- boot.ci(boot_temp_pos, type = "bca")
boot_pos_df[i, "mean"] <- boot_temp_pos_ci$t0
boot_pos_df[i, "ci_05"] <- boot_temp_pos_ci$bca[4]
boot_pos_df[i, "ci_95"] <- boot_temp_pos_ci$bca[5]
}
boot_neu_df[i, "function_id"] <- colnames(gifts_funct_bw_neu)[i]
boot_temp_neu <- boot(gifts_funct_bw_neu[, i], statistic = mean_func, R = 10000)
boot_temp_neu_ci <- boot.ci(boot_temp_neu, type = "bca")
boot_neu_df[i, "mean"] <- boot_temp_neu_ci$t0
boot_neu_df[i, "ci_05"] <- boot_temp_neu_ci$bca[4]
boot_neu_df[i, "ci_95"] <- boot_temp_neu_ci$bca[5]
boot_neg_df[i, "function_id"] <- colnames(gifts_funct_bw_neg)[i]
boot_temp_neg <- boot(gifts_funct_bw_neg[, i], statistic = mean_func, R = 10000)
boot_temp_neg_ci <- boot.ci(boot_temp_neg, type = "bca")
boot_neg_df[i, "mean"] <- boot_temp_neg_ci$t0
boot_neg_df[i, "ci_05"] <- boot_temp_neg_ci$bca[4]
boot_neg_df[i, "ci_95"] <- boot_temp_neg_ci$bca[5]
}
boot_df <- data.frame(rbind(boot_pos_df %>% mutate(bw_association = "postive"),
boot_neu_df %>% mutate(bw_association = "non-significant"),
boot_neg_df %>% mutate(bw_association = "negative")))
gifts_funct_df <- data.frame(rbind(gifts_funct_bw_pos %>% mutate(bw_association = "postive"),
gifts_funct_bw_neu %>% mutate(bw_association = "non-significant"),
gifts_funct_bw_neg %>% mutate(bw_association = "negative")))14.3.1 MCI comparisons of biosynthesis pathways
## B01
p_B01_mg <-
ggplot() +
geom_jitter(data = gifts_funct_df, aes(x = bw_association, y = B01, color = bw_association),
alpha = 0.5, width = 0.25) +
geom_boxplot(data = gifts_funct_df, aes(x = bw_association, y = B01, fill = bw_association),
alpha = 0.1, outlier.shape = NA, width = 0.5) +
geom_linerange(data = boot_df %>% filter(function_id == "B01"),
aes(ymin = ci_05, ymax = ci_95, x = bw_association, color = bw_association),
linewidth = 4, size = 1, position = position_nudge(x = 0.35)) +
scale_color_manual(values = c("red3","grey50","blue3")) +
scale_fill_manual(values = c("red3","grey50","blue3")) +
xlab("Body-weight association") +
theme_minimal() +
theme(legend.position = 'none',
axis.title.x = element_text(size = 14),
axis.text.x = element_text(size = 8),
axis.text.y = element_text(size = 12),
axis.title.y = element_text(size = 14))
## B02
p_B02_mg <-
ggplot() +
geom_jitter(data = gifts_funct_df, aes(x = bw_association, y = B02, color = bw_association),
alpha = 0.5, width = 0.25) +
geom_boxplot(data = gifts_funct_df, aes(x = bw_association, y = B02, fill = bw_association),
alpha = 0.1, outlier.shape = NA, width = 0.5) +
geom_linerange(data = boot_df %>% filter(function_id == "B02"),
aes(ymin = ci_05, ymax = ci_95, x = bw_association, color = bw_association),
linewidth = 4, size = 1, position = position_nudge(x = 0.35)) +
scale_color_manual(values = c("red3","grey50","blue3")) +
scale_fill_manual(values = c("red3","grey50","blue3")) +
xlab("Body-weight association") +
theme_minimal() +
theme(legend.position = 'none',
axis.title.x = element_text(size = 14),
axis.text.x = element_text(size = 8),
axis.text.y = element_text(size = 12),
axis.title.y = element_text(size = 14))
## B03
p_B03_mg <-
ggplot() +
geom_jitter(data = gifts_funct_df, aes(x = bw_association, y = B03, color = bw_association),
alpha = 0.5, width = 0.25) +
geom_boxplot(data = gifts_funct_df, aes(x = bw_association, y = B03, fill = bw_association),
alpha = 0.1, outlier.shape = NA, width = 0.5) +
geom_linerange(data=boot_df %>% filter(function_id=="B03"),
aes(ymin = ci_05, ymax = ci_95, x = bw_association,color=bw_association),
linewidth = 4, size = 1, position = position_nudge(x = 0.35))+
scale_color_manual(values = c("red3","grey50","blue3")) +
scale_fill_manual(values = c("red3","grey50","blue3")) +
xlab("Body-weight association") +
theme_minimal()+
theme(legend.position = 'none',
axis.title.x = element_text(size = 14),
axis.text.x = element_text(size = 8),
axis.text.y = element_text(size = 12),
axis.title.y = element_text(size = 14))
## B04
p_B04_mg <-
ggplot() +
geom_jitter(data = gifts_funct_df, aes(x = bw_association, y = B04, color = bw_association),
alpha = 0.5, width = 0.25)+
geom_boxplot(data = gifts_funct_df, aes(x = bw_association, y = B04, fill = bw_association),
alpha = 0.1, outlier.shape = NA, width = 0.5) +
geom_linerange(data = boot_df %>% filter(function_id == "B04"),
aes(ymin = ci_05, ymax = ci_95, x = bw_association, color = bw_association),
linewidth = 4, size = 1, position = position_nudge(x = 0.35)) +
scale_color_manual(values = c("red3","grey50","blue3")) +
scale_fill_manual(values = c("red3","grey50","blue3")) +
xlab("Body-weight association") +
theme_minimal() +
theme(legend.position = 'none',
axis.title.x = element_text(size = 14),
axis.text.x = element_text(size = 8),
axis.text.y = element_text(size = 12),
axis.title.y = element_text(size = 14))
## B06
p_B06_mg <-
ggplot() +
geom_jitter(data = gifts_funct_df, aes(x = bw_association, y = B06, color = bw_association),
alpha = 0.5, width = 0.25) +
geom_boxplot(data = gifts_funct_df, aes(x = bw_association, y = B06, fill = bw_association),
alpha = 0.1, outlier.shape = NA, width = 0.5) +
geom_linerange(data = boot_df %>% filter(function_id == "B06"),
aes(ymin = ci_05, ymax = ci_95, x = bw_association, color = bw_association),
linewidth = 4, size = 1, position = position_nudge(x = 0.35)) +
scale_color_manual(values = c("red3","grey50","blue3")) +
scale_fill_manual(values = c("red3","grey50","blue3")) +
xlab("Body-weight association") +
theme_minimal() +
theme(legend.position = 'none',
axis.title.x = element_text(size = 14),
axis.text.x = element_text(size = 8),
axis.text.y = element_text(size = 12),
axis.title.y = element_text(size = 14))
## B07
p_B07_mg <-
ggplot() +
geom_jitter(data = gifts_funct_df, aes(x = bw_association, y = B07, color = bw_association),
alpha = 0.5, width = 0.25) +
geom_boxplot(data = gifts_funct_df, aes(x = bw_association, y = B07, fill = bw_association),
alpha = 0.1, outlier.shape = NA, width = 0.5) +
geom_linerange(data = boot_df %>% filter(function_id == "B07"),
aes(ymin = ci_05, ymax = ci_95, x = bw_association, color = bw_association),
linewidth = 4, size = 1, position = position_nudge(x = 0.35)) +
scale_color_manual(values = c("red3","grey50","blue3")) +
scale_fill_manual(values = c("red3","grey50","blue3")) +
xlab("Body-weight association") +
theme_minimal() +
theme(legend.position = 'none',
axis.title.x = element_text(size = 14),
axis.text.x = element_text(size = 8),
axis.text.y = element_text(size = 12),
axis.title.y = element_text(size = 14))
## B08
p_B08_mg <-
ggplot() +
geom_jitter(data = gifts_funct_df, aes(x = bw_association, y = B08, color = bw_association),
alpha = 0.5, width = 0.25) +
geom_boxplot(data = gifts_funct_df, aes(x = bw_association, y = B08, fill = bw_association),
alpha = 0.1, outlier.shape = NA, width = 0.5) +
geom_linerange(data = boot_df %>% filter(function_id == "B08"),
aes(ymin = ci_05, ymax = ci_95, x = bw_association, color = bw_association),
linewidth = 4, size = 1, position = position_nudge(x = 0.35)) +
scale_color_manual(values = c("red3","grey50","blue3")) +
scale_fill_manual(values = c("red3","grey50","blue3")) +
xlab("Body-weight association") +
theme_minimal() +
theme(legend.position = 'none',
axis.title.x = element_text(size = 14),
axis.text.x = element_text(size = 8),
axis.text.y = element_text(size = 12),
axis.title.y = element_text(size = 14))
## B09
p_B09_mg <-
ggplot() +
geom_jitter(data = gifts_funct_df, aes(x = bw_association, y = B09, color = bw_association),
alpha = 0.5, width = 0.25) +
geom_boxplot(data = gifts_funct_df, aes(x = bw_association, y = B09, fill = bw_association),
alpha = 0.1, outlier.shape = NA, width = 0.5) +
geom_linerange(data = boot_df %>% filter(function_id == "B09"),
aes(ymin = ci_05, ymax = ci_95, x = bw_association, color = bw_association),
linewidth = 4, size = 1, position = position_nudge(x = 0.35)) +
scale_color_manual(values = c("red3","grey50","blue3")) +
scale_fill_manual(values = c("red3","grey50","blue3")) +
xlab("Body-weight association") +
theme_minimal() +
theme(legend.position = 'none',
axis.title.x = element_text(size = 14),
axis.text.x = element_text(size = 8),
axis.text.y = element_text(size = 12),
axis.title.y = element_text(size = 14))
## B10
p_B10_mg <-
ggplot() +
geom_jitter(data = gifts_funct_df, aes(x = bw_association, y = B10, color = bw_association),
alpha = 0.5, width = 0.25) +
geom_boxplot(data = gifts_funct_df, aes(x = bw_association, y = B10, fill = bw_association),
alpha = 0.1, outlier.shape = NA, width = 0.5) +
geom_linerange(data = boot_df %>% filter(function_id == "B10"),
aes(ymin = ci_05, ymax = ci_95, x = bw_association, color = bw_association),
linewidth = 4, size = 1, position = position_nudge(x = 0.35)) +
scale_color_manual(values = c("red3","grey50","blue3")) +
scale_fill_manual(values = c("red3","grey50","blue3")) +
xlab("Body-weight association") +
theme_minimal() +
theme(legend.position = 'none',
axis.title.x = element_text(size = 14),
axis.text.x = element_text(size = 8),
axis.text.y = element_text(size = 12),
axis.title.y = element_text(size = 14))
## Avg. MCI Biosynthesis
p_Mci_bio_mg <-
ggplot() +
geom_jitter(data = gifts_funct_df, aes(x = bw_association, y = avg_mci_bio, color = bw_association),
alpha = 0.5, width = 0.25) +
geom_boxplot(data = gifts_funct_df, aes(x = bw_association, y = avg_mci_bio, fill = bw_association),
alpha = 0.1, outlier.shape = NA, width = 0.5) +
geom_linerange(data = boot_df %>% filter(function_id == "avg_mci_bio"),
aes(ymin = ci_05, ymax = ci_95, x = bw_association, color = bw_association),
linewidth = 4, size = 1, position = position_nudge(x = 0.35)) +
scale_color_manual(values = c("red3","grey50","blue3")) +
scale_fill_manual(values = c("red3","grey50","blue3")) +
xlab("Body-weight association") +
ylab("Avg. bio. MCI") +
theme_minimal() +
theme(legend.position = 'none',
axis.title.x = element_text(size = 14),
axis.text.x = element_text(size = 8),
axis.text.y = element_text(size = 12),
axis.title.y = element_text(size = 14))
## Avg. MCI
p_Mci_mg <-
ggplot() +
geom_jitter(data = gifts_funct_df, aes(x = bw_association, y = avg_mci, color = bw_association),
alpha = 0.5, width = 0.25) +
geom_boxplot(data = gifts_funct_df, aes(x = bw_association, y = avg_mci, fill = bw_association),
alpha = 0.1, outlier.shape = NA, width = 0.5) +
geom_linerange(data = boot_df %>% filter(function_id == "avg_mci"),
aes(ymin = ci_05, ymax = ci_95, x = bw_association, color = bw_association),
linewidth = 4, size = 1, position = position_nudge(x = 0.35)) +
scale_color_manual(values = c("red3","grey50","blue3")) +
scale_fill_manual(values = c("red3","grey50","blue3")) +
xlab("Body-weight association") +
ylab("Avg. MCI") +
theme_minimal() +
theme(legend.position = 'none',
axis.title.x = element_text(size = 14),
axis.text.x = element_text(size = 8),
axis.text.y = element_text(size = 12),
axis.title.y = element_text(size = 14))14.3.3 All
sup_s4 <- grid.arrange(p_Mci_bio_mg, p_B01_mg, p_B02_mg,
p_B03_mg, p_B04_mg, p_B06_mg,
p_B07_mg, p_B08_mg, p_B09_mg,
p_B10_mg, ncol = 3)14.3.3.1 MCI comparisons of degradation pathways
## D01
p_D01_mg <-
ggplot() +
geom_jitter(data = gifts_funct_df, aes(x = bw_association, y = D01, color = bw_association),
alpha = 0.5, width = 0.25) +
geom_boxplot(data = gifts_funct_df, aes(x = bw_association, y = D01, fill = bw_association),
alpha = 0.1, outlier.shape = NA, width = 0.5) +
geom_linerange(data = boot_df %>% filter(function_id == "D01"),
aes(ymin = ci_05, ymax = ci_95, x = bw_association, color = bw_association),
linewidth = 4, size = 1, position = position_nudge(x = 0.35)) +
scale_color_manual(values = c("red3","grey50","blue3")) +
scale_fill_manual(values = c("red3","grey50","blue3")) +
xlab("Body-weight association") +
theme_minimal() +
theme(legend.position = 'none',
axis.title.x = element_text(size = 14),
axis.text.x = element_text(size = 8),
axis.text.y = element_text(size = 12),
axis.title.y = element_text(size = 14))
## D02
p_D02_mg <-
ggplot() +
geom_jitter(data = gifts_funct_df, aes(x = bw_association, y = D02, color = bw_association),
alpha = 0.5, width = 0.25) +
geom_boxplot(data = gifts_funct_df, aes(x = bw_association, y = D02, fill = bw_association),
alpha = 0.1, outlier.shape = NA, width = 0.5) +
geom_linerange(data = boot_df %>% filter(function_id == "D02"),
aes(ymin = ci_05, ymax = ci_95, x = bw_association, color = bw_association),
linewidth = 4, size = 1, position = position_nudge(x = 0.35)) +
scale_color_manual(values = c("red3","grey50","blue3")) +
scale_fill_manual(values = c("red3","grey50","blue3")) +
xlab("Body-weight association") +
theme_minimal() +
theme(legend.position = 'none',
axis.title.x = element_text(size = 14),
axis.text.x = element_text(size = 8),
axis.text.y = element_text(size = 12),
axis.title.y = element_text(size = 14))
## D03
p_D03_mg <-
ggplot() +
geom_jitter(data = gifts_funct_df, aes(x = bw_association, y = D03, color = bw_association),
alpha = 0.5, width = 0.25) +
geom_boxplot(data = gifts_funct_df, aes(x = bw_association, y = D03, fill = bw_association),
alpha = 0.1, outlier.shape = NA, width = 0.5) +
geom_linerange(data = boot_df %>% filter(function_id == "D03"),
aes(ymin = ci_05, ymax = ci_95, x = bw_association, color = bw_association),
linewidth = 4, size = 1, position = position_nudge(x = 0.35)) +
scale_color_manual(values = c("red3","grey50","blue3")) +
scale_fill_manual(values = c("red3","grey50","blue3")) +
xlab("Body-weight association") +
theme_minimal() +
theme(legend.position = 'none',
axis.title.x = element_text(size = 14),
axis.text.x = element_text(size = 8),
axis.text.y = element_text(size = 12),
axis.title.y = element_text(size =14))
## D05
p_D05_mg <-
ggplot() +
geom_jitter(data = gifts_funct_df, aes(x = bw_association, y = D05, color = bw_association),
alpha = 0.5,width = 0.25) +
geom_boxplot(data = gifts_funct_df, aes(x = bw_association, y = D05, fill = bw_association),
alpha = 0.1, outlier.shape = NA, width = 0.5) +
geom_linerange(data = boot_df %>% filter(function_id == "D05"),
aes(ymin = ci_05, ymax = ci_95, x = bw_association, color = bw_association),
linewidth = 4, size = 1, position = position_nudge(x = 0.35)) +
scale_color_manual(values = c("red3","grey50","blue3")) +
scale_fill_manual(values = c("red3","grey50","blue3")) +
xlab("Body-weight association") +
theme_minimal() +
theme(legend.position = 'none',
axis.title.x = element_text(size = 14),
axis.text.x = element_text(size = 8),
axis.text.y = element_text(size = 12),
axis.title.y = element_text(size = 14))
## D06
p_D06_mg <-
ggplot() +
geom_jitter(data = gifts_funct_df, aes(x = bw_association, y = D06, color = bw_association),
alpha = 0.5, width = 0.25) +
geom_boxplot(data = gifts_funct_df, aes(x = bw_association, y = D06, fill = bw_association),
alpha = 0.1, outlier.shape = NA, width = 0.5) +
geom_linerange(data = boot_df %>% filter(function_id == "D06"),
aes(ymin = ci_05, ymax = ci_95, x = bw_association, color = bw_association),
linewidth = 4, size = 1, position = position_nudge(x = 0.35)) +
scale_color_manual(values = c("red3","grey50","blue3")) +
scale_fill_manual(values = c("red3","grey50","blue3")) +
xlab("Body-weight association") +
theme_minimal() +
theme(legend.position = 'none',
axis.title.x = element_text(size = 14),
axis.text.x = element_text(size = 8),
axis.text.y = element_text(size = 12),
axis.title.y = element_text(size = 14))
## D07
p_D07_mg <-
ggplot() +
geom_jitter(data = gifts_funct_df, aes(x = bw_association, y = D07, color = bw_association),
alpha = 0.5, width = 0.25) +
geom_boxplot(data = gifts_funct_df, aes(x = bw_association, y = D07, fill = bw_association),
alpha = 0.1, outlier.shape = NA, width = 0.5) +
geom_linerange(data = boot_df %>% filter(function_id == "D07"),
aes(ymin = ci_05, ymax = ci_95, x = bw_association, color = bw_association),
linewidth = 4, size = 1, position = position_nudge(x = 0.35)) +
scale_color_manual(values = c("red3","grey50","blue3")) +
scale_fill_manual(values = c("red3","grey50","blue3")) +
xlab("Body-weight association") +
theme_minimal() +
theme(legend.position = 'none',
axis.title.x = element_text(size = 14),
axis.text.x = element_text(size = 8),
axis.text.y = element_text(size = 12),
axis.title.y = element_text(size = 14))
## D08
p_D08_mg <-
ggplot() +
geom_jitter(data = gifts_funct_df, aes(x = bw_association, y = D08, color = bw_association),
alpha = 0.5, width = 0.25) +
geom_boxplot(data = gifts_funct_df, aes(x = bw_association, y = D08, fill = bw_association),
alpha = 0.1, outlier.shape = NA, width = 0.5) +
geom_linerange(data = boot_df %>% filter(function_id == "D08"),
aes(ymin = ci_05, ymax = ci_95, x = bw_association, color = bw_association),
linewidth = 4, size = 1, position = position_nudge(x = 0.35)) +
scale_color_manual(values = c("red3","grey50","blue3")) +
scale_fill_manual(values = c("red3","grey50","blue3")) +
xlab("Body-weight association") +
theme_minimal() +
theme(legend.position = 'none',
axis.title.x = element_text(size = 14),
axis.text.x = element_text(size = 8),
axis.text.y = element_text(size = 12),
axis.title.y = element_text(size = 14))
## D09
p_D09_mg <-
ggplot() +
geom_jitter(data = gifts_funct_df, aes(x = bw_association, y = D09, color = bw_association),
alpha = 0.5, width = 0.25) +
geom_boxplot(data = gifts_funct_df, aes(x = bw_association, y = D09, fill = bw_association),
alpha = 0.1, outlier.shape = NA, width = 0.5) +
geom_linerange(data = boot_df %>% filter(function_id == "D09"),
aes(ymin = ci_05, ymax = ci_95, x = bw_association, color = bw_association),
linewidth = 4, size = 1, position = position_nudge(x = 0.35)) +
scale_color_manual(values = c("red3","grey50","blue3")) +
scale_fill_manual(values = c("red3","grey50","blue3")) +
xlab("Body-weight association") +
theme_minimal() +
theme(legend.position = 'none',
axis.title.x = element_text(size = 14),
axis.text.x = element_text(size = 8),
axis.text.y = element_text(size = 12),
axis.title.y = element_text(size = 14))
## Avg. MCI Degradation
p_Mci_deg_mg <-
ggplot() +
geom_jitter(data = gifts_funct_df, aes(x = bw_association, y = avg_mci_deg, color = bw_association),
alpha = 0.5, width = 0.25) +
geom_boxplot(data = gifts_funct_df, aes(x = bw_association, y = avg_mci_deg, fill = bw_association),
alpha = 0.1, outlier.shape = NA, width = 0.5) +
geom_linerange(data = boot_df %>% filter(function_id == "avg_mci_deg"),
aes(ymin = ci_05, ymax = ci_95, x = bw_association, color = bw_association),
linewidth = 4, size = 1, position = position_nudge(x = 0.35)) +
scale_color_manual(values = c("red3","grey50","blue3")) +
scale_fill_manual(values = c("red3","grey50","blue3")) +
xlab("Body-weight association") +
ylab("Avg. deg. MCI") +
theme_minimal() +
theme(legend.position = 'none',
axis.title.x = element_text(size = 14),
axis.text.x = element_text(size = 8),
axis.text.y = element_text(size = 12),
axis.title.y = element_text(size = 14))14.4 Heatmap of MCI at element level
Not included in Supplementary.
gifts_bw_pos <-
gifts %>%
mutate(bw_effect = trends_bw$trend_09) %>%
filter(bw_effect == "positive") %>%
select(-bw_effect)
gifts_bw_neu <-
gifts %>%
mutate(bw_effect = trends_bw$trend_09) %>%
filter(bw_effect == "non-significant") %>%
select(-bw_effect)
n_neg_sp <- 50
gifts_bw_neg <-
gifts %>%
mutate(bw_effect = trends_bw$trend_09, bw_param = trends_bw$parameter) %>%
filter(bw_effect == "negative") %>%
arrange(bw_param) %>%
slice(1:n_neg_sp) %>%
select(-c(bw_effect, bw_param))
gifts_bw_df_long <-
data.frame(rbind(gifts_bw_pos, gifts_bw_neg)) %>%
mutate(bw_association = c(rep("positive", nrow(gifts_bw_pos)), rep("negative", nrow(gifts_bw_neg)))) %>%
select(-matches("S0")) %>%
rownames_to_column(var = "genome") %>%
pivot_longer(!c(genome, bw_association), names_to = "Element", values_to = "mci") %>%
mutate(Function = substr(Element, start = 1, stop = 3)) %>%
relocate(Function, .after = Element)
# Heatmap
gifts_bw_df_long %>%
rename(Code_element = "Element") %>%
rename(Code_function = "Function") %>%
left_join(gift_colors, by = "Code_function") %>%
group_by(Code_element) %>%
filter(sum(mci) > 0) %>%
ungroup() %>%
ggplot(., aes(x = genome, y = Code_element, fill = bw_association, group = legend, alpha = mci)) +
geom_tile(color = "#ffffff") +
scale_y_discrete(guide = guide_axis(check.overlap = TRUE)) +
scale_x_discrete(guide = guide_axis(check.overlap = TRUE)) +
scale_fill_manual(values = c("red3",
"blue3"),
na.value = "#f4f4f4") +
facet_grid(legend ~ bw_association, scales = "free", space = "free") +
theme_void(base_size = 8) +
theme(axis.text.y = element_blank(),
strip.text.y = element_blank(),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, size = 6),
legend.position = "none")15 Comparing activity of positively and negatively associated bacteria
15.1 Load data
load("data/data_mt.Rdata")
load("data/data_mg.Rdata")
trends_bw <-
read_tsv("data/hmsc_bw_trend.tsv") %>%
mutate(trend_07 = case_when(bw_support > 0.85 ~ 'positive',
bw_support < 0.15 ~ 'negative',
TRUE ~ 'non-significant')) %>%
mutate(trend_09 = case_when(bw_support > 0.95 ~ 'positive',
bw_support < 0.05 ~ 'negative',
TRUE ~ 'non-significant'))
gift_colors <- read_tsv("data/gift_colors.tsv") %>%
mutate(legend = str_c(Code_function, " - ", Function))15.2 Expression profiles of positively and negatively associated bacteria
The resulting plot corresponds to Figure 3c in the manuscript.
15.2.1 Element-based expression table of day 35 individuals
mag_elements <- lapply(expr_counts, function(x) to.elements(x, GIFT_db2))
# Convert correct tibble for applying CLR conversion
mag_elements_merged <-
mag_elements %>%
lapply(function(x) t(x)) %>%
lapply(function(x) as.data.frame(x)) %>%
Map(cbind, ., MAG = names(.)) %>%
lapply(function(x) rownames_to_column(x, "Element")) %>%
do.call(rbind, .) %>%
mutate(Function = substr(Element, start = 1, stop = 3))%>%
as.data.frame() %>%
relocate(MAG, .before = Element) %>%
relocate(Function, .after = Element)
# Select individuals from day 35
metadata_day_35 <-
chicken_metadata_mt %>%
filter(sampling_time == "35")
mag_elements_day_35 <-
mag_elements_merged %>%
select(MAG, Element, Function, metadata_day_35$animal_code)
metadata_mt <-
metadata_day_35 %>%
arrange(match(metadata_day_35$animal_code, colnames(mag_elements_day_35[,-c(1,3)])))
mean(metadata_mt$animal_code == colnames(mag_elements_day_35[,-c(1:3)]))[1] 1
15.2.2 Normalised expression table
normalisation_factor <-
expr_counts %>% # Normalisation is performed with dist_expr because it contains the transcripts for all the functions, not only the functions considered important for the host in distillR.
lapply(function(x) rowSums(x)) %>%
reduce(cbind) %>%
rowSums()
normalisation_factor <- normalisation_factor[match(metadata_mt$animal_code,names(normalisation_factor))]
mean(names(normalisation_factor) == metadata_mt$animal_code)[1] 1
15.2.3 Filter MAGs associated with chicken body weight
positive_mags <-
trends_bw %>%
filter(trend_09 == "positive") %>%
select(genome) %>%
unlist() %>%
as.character()
negative_mags <-
trends_bw %>%
filter(trend_09 == "negative") %>%
select(genome) %>%
unlist() %>%
as.character()
n_neg_sp <- 50 # Filter 50 negative MAGs for the figure
negative_mags_top50 <-
trends_bw %>%
filter(trend_09 == "negative") %>%
arrange(parameter) %>%
slice(1:n_neg_sp) %>%
select(genome) %>%
unlist() %>%
as.character()
mag_subset_elements <-
norm_elements_day_35 %>%
filter(MAG %in% c(positive_mags, negative_mags))
mag_subset_element_top50s <-
norm_elements_day_35 %>%
filter(MAG %in% c(positive_mags, negative_mags_top50))15.2.4 Tidy data
# Filter relative abundance table
mag_weighted_day35 <-
mag_weighted %>%
t() %>%
as.data.frame() %>%
rownames_to_column(var = 'animal_code') %>%
filter(animal_code %in% colnames(mag_subset_elements[, -c(1:3)])) %>%
column_to_rownames(var = "animal_code")
total_day35 <-
t(decostand(mag_weighted_day35, "total")) %>%
as.data.frame() %>%
rownames_to_column(var = "genome") %>%
filter(genome %in% c(positive_mags, negative_mags)) %>%
column_to_rownames(var = "genome") %>%
mutate(mean_abundance = rowMeans(.)) %>%
rownames_to_column(var = "genome") %>%
select(genome, mean_abundance)
mag_subset_elements_2 <-
mag_subset_element_top50s %>%
rename(genome = 'MAG') %>%
left_join(total_day35, by = "genome") %>%
mutate(across(CA01.15:CB24.14, ~ .x / mean_abundance)) %>%
select(-mean_abundance) %>%
mutate(bw_association = if_else(genome %in% positive_mags, "positive", "negative")) %>%
mutate(avg_expr_million = rowMeans(across(CA01.15:CB24.14)) * 1000000) %>%
filter(!grepl("S", Function)) %>%
group_by(Element) %>%
filter(sum(avg_expr_million) > 0) %>%
ungroup() %>%
group_by(genome) %>%
filter(sum(avg_expr_million) > 0) %>%
ungroup()
mag_subset_elements_3 <-
mag_subset_elements %>%
rename(genome = 'MAG') %>%
left_join(total_day35, by = "genome") %>%
mutate(across(CA01.15:CB24.14, ~ .x / mean_abundance)) %>%
select(-mean_abundance) %>%
mutate(bw_association = if_else(genome %in% positive_mags, "positive", "negative")) %>%
mutate(avg_expr_million = rowMeans(across(CA01.15:CB24.14)) * 1000000) %>%
filter(!grepl("S", Function)) %>%
group_by(Element) %>%
filter(sum(avg_expr_million) > 0) %>%
ungroup() %>%
group_by(genome) %>%
filter(sum(avg_expr_million) > 0) %>%
ungroup()15.2.5 Plot
mag_subset_elements_2 %>%
rename(Code_function = "Function") %>%
left_join(gift_colors, by = "Code_function") %>%
ggplot(aes(x = genome, y = Element, fill = bw_association, group = legend, alpha = log(avg_expr_million, 2))) +
geom_tile(color = "#ffffff") +
scale_y_discrete(guide = guide_axis(check.overlap = TRUE)) +
scale_x_discrete(guide = guide_axis(check.overlap = TRUE)) +
scale_fill_manual(values = c("red3",
"blue3"),
na.value = "#f4f4f4") +
facet_grid(legend ~ bw_association, scales = "free", space = "free") +
theme_void(base_size = 8) +
theme(axis.text.x = element_text(size = 5, angle = 90),
axis.text.y = element_text(size = 5),
strip.text.y = element_blank(),
legend.position = "none")15.3 Taxonomy of reduced MAGs
reduced_mags1 <-
mag_subset_elements_3 %>%
filter(Element == "B0101") %>%
filter(avg_expr_million == 0) %>%
select(genome) %>%
unlist() %>%
as.character()
reduced_mags2 <-
mag_subset_elements_3 %>%
filter(Element == "B0102") %>%
filter(avg_expr_million == 0) %>%
select(genome) %>%
unlist() %>%
as.character()
reduced_mags3 <-
mag_subset_elements_3 %>%
filter(Element == "B0103") %>%
filter(avg_expr_million == 0) %>%
select(genome) %>%
unlist() %>%
as.character()
reduced_mags <- intersect(intersect(reduced_mags1, reduced_mags2), reduced_mags3)
reduced_mags_df <-
mag_subset_elements_3 %>%
filter(genome %in% reduced_mags) %>%
group_by(Element) %>%
filter(sum(avg_expr_million) > 0) %>%
ungroup() %>%
group_by(genome) %>%
filter(sum(avg_expr_million) > 0) %>%
ungroup()%>%
pivot_wider(id_cols = c(genome, bw_association), names_from = Element, values_from = avg_expr_million)All reduced MAGs with positive association belong to RF39
positives <-
reduced_mags_df %>%
filter(bw_association == "positive") %>%
select(genome) %>%
unlist() %>%
as.character()
genome_taxonomy[genome_taxonomy$genome %in% positives,] %>%
select(order) %>%
table()order
RF39
10
family
UBA660
10
Reduced MAGs with negative association belong to various orders, mainly to Christensenellales
negatives <-
reduced_mags_df %>%
filter(bw_association == "negative") %>%
select(genome) %>%
unlist() %>%
as.character()
genome_taxonomy[genome_taxonomy$genome %in% negatives,] %>%
select(order) %>%
table()order
Bacteroidales Christensenellales Coriobacteriales Lachnospirales Monoglobales_A Oscillospirales Saccharimonadales
1 12 1 1 1 1 1
family
Acutalibacteraceae Atopobiaceae Borkfalkiaceae Lachnospiraceae Rikenellaceae Saccharimonadaceae UBA1242 UBA1381
1 1 1 1 1 1 11 1
15.4 Genomic characteristics of genome reduced MAGs
The resulting plot corresponds to Figure 4a.
gifts <-
gifts_elements %>%
as.data.frame() %>%
select(where(~ sum(.) > 0))
mci_df <-
gifts %>%
filter(rownames(.) %in% reduced_mags_df$genome) %>%
mutate(avg_mci = rowMeans(.)) %>%
rownames_to_column(var = "genome") %>%
left_join(reduced_mags_df, by = "genome") %>%
left_join(genome_taxonomy, by = "genome") %>%
filter(family=="UBA1242"| order=="RF39") %>%
select(genome, avg_mci, bw_association)
length_df <-
genome_stats %>%
filter(genome %in% reduced_mags_df$genome) %>%
left_join(mci_df, by = "genome") %>%
left_join(genome_taxonomy, by = "genome") %>%
filter(family=="UBA1242"|order=="RF39") %>%
select(genome, mag_length, bw_association)
mean_func <- function(data, indices) {
return(mean(data[indices]))
}
boot_mci_df <- data.frame(matrix(nrow = 2, ncol = 4))
colnames(boot_mci_df) <-c ("bw_association","mean","ci_025","ci_975")
boot_mci_df$bw_association <- c("positive","negative")
bw_pos_redMAG_mci_boot <- boot(mci_df$avg_mci[mci_df$bw_association == "positive"], statistic = mean_func, R = 10000)
bw_pos_redMAG_mci_boot_ci <- boot.ci(bw_pos_redMAG_mci_boot, type = "bca")
bw_neg_redMAG_mci_boot<-boot(mci_df$avg_mci[mci_df$bw_association == "negative"], statistic = mean_func, R = 10000)
bw_neg_redMAG_mci_boot_ci<-boot.ci(bw_neg_redMAG_mci_boot, type = "bca")
boot_mci_df[1,"mean"] <- bw_pos_redMAG_mci_boot$t0
boot_mci_df[1,"ci_025"] <- bw_pos_redMAG_mci_boot_ci$bca[,4]
boot_mci_df[1,"ci_975"] <- bw_pos_redMAG_mci_boot_ci$bca[,5]
boot_mci_df[2,"mean"] <- bw_neg_redMAG_mci_boot$t0
boot_mci_df[2,"ci_025"] <- bw_neg_redMAG_mci_boot_ci$bca[,4]
boot_mci_df[2,"ci_975"] <- bw_neg_redMAG_mci_boot_ci$bca[,5]mci_df %>%
ggplot()+
geom_jitter(aes(x = bw_association, y = avg_mci, color = bw_association),
alpha = 0.5, width = 0.25) +
geom_boxplot(aes(x = bw_association, y = avg_mci, fill = bw_association),
alpha = 0.1, outlier.shape = NA, width = 0.5) +
geom_linerange(data = boot_mci_df,
aes(ymin = ci_025, ymax = ci_975, x = bw_association, color = bw_association),
linewidth = 4, size = 1, position = position_nudge(x = 0.35)) +
scale_color_manual(values = c("red3","blue3")) +
scale_fill_manual(values = c("red3","blue3")) +
ylab("Avg. MCI")+
xlab("Body-weight association")+
theme_minimal()+
theme(legend.position = 'none',
axis.title.x = element_text(size = 12),
axis.text.x = element_text(size = 10),
axis.text.y = element_text(size = 10),
axis.title.y = element_text(size = 12)) 15.4.1 Genome length of genome reduced MAGss
The resulting plot corresponds to Figure 4b.
boot_length_df <- data.frame(matrix(nrow = 2, ncol = 4))
colnames(boot_length_df) <- c("bw_association", "mean", "ci_025", "ci_975")
boot_length_df$bw_association <- c("positive", "negative")
bw_pos_red_mag_length_boot <- boot(length_df$mag_length[length_df$bw_association == "positive"], statistic = mean_func, R = 10000)
bw_pos_red_mag_length_boot_ci <- boot.ci(bw_pos_red_mag_length_boot, type = "bca")
bw_neg_red_mag_length_boot <- boot(length_df$mag_length[length_df$bw_association == "negative"], statistic = mean_func, R = 10000)
bw_neg_red_mag_length_boot_ci <- boot.ci(bw_neg_red_mag_length_boot, type = "bca")
boot_length_df[1, "mean"] <- bw_pos_red_mag_length_boot$t0
boot_length_df[1, "ci_025"] <- bw_pos_red_mag_length_boot_ci$bca[,4]
boot_length_df[1, "ci_975"] <- bw_pos_red_mag_length_boot_ci$bca[,5]
boot_length_df[2, "mean"] <- bw_neg_red_mag_length_boot$t0
boot_length_df[2, "ci_025"] <- bw_neg_red_mag_length_boot_ci$bca[,4]
boot_length_df[2, "ci_975"] <- bw_neg_red_mag_length_boot_ci$bca[,5]length_df %>%
ggplot() +
geom_jitter(aes(x = bw_association, y = mag_length, color = bw_association),
alpha = 0.5, width = 0.25) +
geom_boxplot(aes(x = bw_association, y = mag_length, fill = bw_association),
alpha = 0.1, outlier.shape = NA, width = 0.5) +
geom_linerange(data = boot_length_df,
aes(ymin = ci_025, ymax = ci_975, x = bw_association, color = bw_association),
linewidth = 4, size = 1,position = position_nudge(x = 0.35)) +
scale_color_manual(values = c("red3","blue3")) +
scale_fill_manual(values = c("red3","blue3")) +
ylab("MAG length") +
xlab("Body-weight association") +
theme_minimal() +
theme(legend.position = 'none',
axis.title.x = element_text(size = 12),
axis.text.x = element_text(size = 10),
axis.text.y = element_text(size = 10),
axis.title.y = element_text(size = 12)) 15.4.2 Expression profiles of genome reduced MAGs
The resulting plot corresponds to Figure 4c.
comp_reduced_mags <-
reduced_mags_df %>%
left_join(genome_taxonomy, by = 'genome') %>%
filter(family=="UBA1242"|order=="RF39") %>%
select(-domain:-species)
reduced_mags_dist <- vegdist(sqrt(comp_reduced_mags[, -c(1:3)]), method = "bray")
pcoa_reduced_mags <- cmdscale(reduced_mags_dist, eig = TRUE, k = 3, add = TRUE)
pcoa_reduced_mags_df <-
pcoa_reduced_mags$points %>%
as.data.frame() %>%
mutate(genome = comp_reduced_mags$genome) %>%
mutate(bw_association = comp_reduced_mags$bw_association) %>%
rename(PCOA1 = 'V1', PCOA2 = 'V2', PCOA3 = 'V3')
envfit_gifts <- envfit(pcoa_reduced_mags_df[, 1:2], sqrt(comp_reduced_mags[, -c(1,2)]), permutations = 999)
envfit_gifts_df <-
as.data.frame(scores(envfit_gifts, display = "vectors")) %>%
rownames_to_column(.,var = "gift_id") %>%
mutate(pval = envfit_gifts$vectors$pvals) %>%
filter(pval < 0.05) %>%
select(gift_id, PCOA1, PCOA2)ggplot() +
geom_point(data = pcoa_reduced_mags_df,
aes(x = PCOA1, y = PCOA2, color = bw_association),
size = 2,
shape = 16,
alpha = 0.8) +
scale_color_manual(values = c("red3","blue3")) +
geom_segment(data = envfit_gifts_df,
aes(x = 0, xend = PCOA1*0.5, y = 0 , yend = PCOA2*0.5),
arrow = arrow(length = unit(0.25, "cm")),
color="gray20",
linewidth = 0.2) +
geom_label_repel(data = envfit_gifts_df, aes(x = PCOA1*0.5, y = PCOA2*0.5, label = gift_id), size = 1, max.overlaps = 10) +
theme_minimal() +
theme(legend.position = 'none',
axis.title.x = element_text(size = 12),
axis.title.y = element_text(size = 12),
axis.text.x = element_text(size = 10),
axis.text.y = element_text(size = 10))15.4.3 Activity heatmap of genome reduced bacteria
mag_subset_elements_3 %>%
rename(Code_element = "Element") %>%
rename(Code_function = "Function") %>%
left_join(GIFT_db2 %>% select (Code_element, Code_function, Element, Function)) %>%
left_join(gift_colors, by = "Code_function") %>%
left_join(genome_taxonomy, by = 'genome') %>%
filter(family == "UBA1242" | family == "UBA660") %>%
select(-domain:-species) %>%
group_by(Element) %>%
filter(sum(avg_expr_million) > 0) %>%
ungroup() %>%
group_by(genome) %>%
filter(sum(avg_expr_million) > 0) %>%
ungroup() %>%
ggplot(aes(x = genome,
y = Code_element,
fill = bw_association,
group = legend,
alpha = log(avg_expr_million, 2))) +
geom_tile(color = "#ffffff") +
scale_y_discrete(guide = guide_axis(check.overlap = TRUE)) +
scale_x_discrete(guide = guide_axis(check.overlap = TRUE)) +
scale_fill_manual(values = c("red3",
"blue3"),
na.value = "#f4f4f4") +
facet_grid(legend ~ bw_association, scales = "free", space = "free") +
theme_void(base_size = 8) +
theme(axis.text.x = element_text(angle = 90, vjust = 0, hjust = 1),
axis.text.y = element_text(size = 5, vjust = 0, hjust = 1),
strip.text.y = element_blank(),
legend.position = "none")University of Copenhagen, antton.alberdi@sund.ku.dk↩︎